1  /*
2   * Copyright 2012 Advanced Micro Devices, Inc.
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20   * OTHER DEALINGS IN THE SOFTWARE.
21   *
22   * Authors: Alex Deucher
23   */
24  
25  #include <linux/firmware.h>
26  #include <linux/module.h>
27  #include <linux/pci.h>
28  #include <linux/slab.h>
29  
30  #include <drm/drm_vblank.h>
31  
32  #include "atom.h"
33  #include "evergreen.h"
34  #include "cik_blit_shaders.h"
35  #include "cik.h"
36  #include "cikd.h"
37  #include "clearstate_ci.h"
38  #include "r600.h"
39  #include "radeon.h"
40  #include "radeon_asic.h"
41  #include "radeon_audio.h"
42  #include "radeon_ucode.h"
43  #include "si.h"
44  #include "vce.h"
45  
46  #define SH_MEM_CONFIG_GFX_DEFAULT \
47  	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48  
49  MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50  MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51  MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52  MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53  MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54  MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55  MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56  MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57  MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58  
59  MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60  MODULE_FIRMWARE("radeon/bonaire_me.bin");
61  MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62  MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63  MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64  MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65  MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66  MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67  MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68  
69  MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70  MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71  MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72  MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73  MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74  MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75  MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76  MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77  MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78  
79  MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80  MODULE_FIRMWARE("radeon/hawaii_me.bin");
81  MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82  MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83  MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84  MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85  MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86  MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87  MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88  
89  MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90  MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91  MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92  MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93  MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94  MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95  
96  MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97  MODULE_FIRMWARE("radeon/kaveri_me.bin");
98  MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99  MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100  MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101  MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102  MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103  
104  MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105  MODULE_FIRMWARE("radeon/KABINI_me.bin");
106  MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107  MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108  MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109  MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110  
111  MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112  MODULE_FIRMWARE("radeon/kabini_me.bin");
113  MODULE_FIRMWARE("radeon/kabini_ce.bin");
114  MODULE_FIRMWARE("radeon/kabini_mec.bin");
115  MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116  MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117  
118  MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119  MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120  MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121  MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122  MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123  MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124  
125  MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126  MODULE_FIRMWARE("radeon/mullins_me.bin");
127  MODULE_FIRMWARE("radeon/mullins_ce.bin");
128  MODULE_FIRMWARE("radeon/mullins_mec.bin");
129  MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130  MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131  
132  static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133  static void cik_rlc_stop(struct radeon_device *rdev);
134  static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135  static void cik_program_aspm(struct radeon_device *rdev);
136  static void cik_init_pg(struct radeon_device *rdev);
137  static void cik_init_cg(struct radeon_device *rdev);
138  static void cik_fini_pg(struct radeon_device *rdev);
139  static void cik_fini_cg(struct radeon_device *rdev);
140  static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141  					  bool enable);
142  
143  /**
144   * cik_get_allowed_info_register - fetch the register for the info ioctl
145   *
146   * @rdev: radeon_device pointer
147   * @reg: register offset in bytes
148   * @val: register value
149   *
150   * Returns 0 for success or -EINVAL for an invalid register
151   *
152   */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)153  int cik_get_allowed_info_register(struct radeon_device *rdev,
154  				  u32 reg, u32 *val)
155  {
156  	switch (reg) {
157  	case GRBM_STATUS:
158  	case GRBM_STATUS2:
159  	case GRBM_STATUS_SE0:
160  	case GRBM_STATUS_SE1:
161  	case GRBM_STATUS_SE2:
162  	case GRBM_STATUS_SE3:
163  	case SRBM_STATUS:
164  	case SRBM_STATUS2:
165  	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166  	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167  	case UVD_STATUS:
168  	/* TODO VCE */
169  		*val = RREG32(reg);
170  		return 0;
171  	default:
172  		return -EINVAL;
173  	}
174  }
175  
176  /*
177   * Indirect registers accessor
178   */
cik_didt_rreg(struct radeon_device * rdev,u32 reg)179  u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180  {
181  	unsigned long flags;
182  	u32 r;
183  
184  	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185  	WREG32(CIK_DIDT_IND_INDEX, (reg));
186  	r = RREG32(CIK_DIDT_IND_DATA);
187  	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188  	return r;
189  }
190  
cik_didt_wreg(struct radeon_device * rdev,u32 reg,u32 v)191  void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192  {
193  	unsigned long flags;
194  
195  	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196  	WREG32(CIK_DIDT_IND_INDEX, (reg));
197  	WREG32(CIK_DIDT_IND_DATA, (v));
198  	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199  }
200  
201  /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)202  int ci_get_temp(struct radeon_device *rdev)
203  {
204  	u32 temp;
205  	int actual_temp = 0;
206  
207  	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208  		CTF_TEMP_SHIFT;
209  
210  	if (temp & 0x200)
211  		actual_temp = 255;
212  	else
213  		actual_temp = temp & 0x1ff;
214  
215  	return actual_temp * 1000;
216  }
217  
218  /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)219  int kv_get_temp(struct radeon_device *rdev)
220  {
221  	u32 temp;
222  	int actual_temp = 0;
223  
224  	temp = RREG32_SMC(0xC0300E0C);
225  
226  	if (temp)
227  		actual_temp = (temp / 8) - 49;
228  	else
229  		actual_temp = 0;
230  
231  	return actual_temp * 1000;
232  }
233  
234  /*
235   * Indirect registers accessor
236   */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)237  u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238  {
239  	unsigned long flags;
240  	u32 r;
241  
242  	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243  	WREG32(PCIE_INDEX, reg);
244  	(void)RREG32(PCIE_INDEX);
245  	r = RREG32(PCIE_DATA);
246  	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247  	return r;
248  }
249  
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)250  void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251  {
252  	unsigned long flags;
253  
254  	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255  	WREG32(PCIE_INDEX, reg);
256  	(void)RREG32(PCIE_INDEX);
257  	WREG32(PCIE_DATA, v);
258  	(void)RREG32(PCIE_DATA);
259  	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260  }
261  
262  static const u32 spectre_rlc_save_restore_register_list[] =
263  {
264  	(0x0e00 << 16) | (0xc12c >> 2),
265  	0x00000000,
266  	(0x0e00 << 16) | (0xc140 >> 2),
267  	0x00000000,
268  	(0x0e00 << 16) | (0xc150 >> 2),
269  	0x00000000,
270  	(0x0e00 << 16) | (0xc15c >> 2),
271  	0x00000000,
272  	(0x0e00 << 16) | (0xc168 >> 2),
273  	0x00000000,
274  	(0x0e00 << 16) | (0xc170 >> 2),
275  	0x00000000,
276  	(0x0e00 << 16) | (0xc178 >> 2),
277  	0x00000000,
278  	(0x0e00 << 16) | (0xc204 >> 2),
279  	0x00000000,
280  	(0x0e00 << 16) | (0xc2b4 >> 2),
281  	0x00000000,
282  	(0x0e00 << 16) | (0xc2b8 >> 2),
283  	0x00000000,
284  	(0x0e00 << 16) | (0xc2bc >> 2),
285  	0x00000000,
286  	(0x0e00 << 16) | (0xc2c0 >> 2),
287  	0x00000000,
288  	(0x0e00 << 16) | (0x8228 >> 2),
289  	0x00000000,
290  	(0x0e00 << 16) | (0x829c >> 2),
291  	0x00000000,
292  	(0x0e00 << 16) | (0x869c >> 2),
293  	0x00000000,
294  	(0x0600 << 16) | (0x98f4 >> 2),
295  	0x00000000,
296  	(0x0e00 << 16) | (0x98f8 >> 2),
297  	0x00000000,
298  	(0x0e00 << 16) | (0x9900 >> 2),
299  	0x00000000,
300  	(0x0e00 << 16) | (0xc260 >> 2),
301  	0x00000000,
302  	(0x0e00 << 16) | (0x90e8 >> 2),
303  	0x00000000,
304  	(0x0e00 << 16) | (0x3c000 >> 2),
305  	0x00000000,
306  	(0x0e00 << 16) | (0x3c00c >> 2),
307  	0x00000000,
308  	(0x0e00 << 16) | (0x8c1c >> 2),
309  	0x00000000,
310  	(0x0e00 << 16) | (0x9700 >> 2),
311  	0x00000000,
312  	(0x0e00 << 16) | (0xcd20 >> 2),
313  	0x00000000,
314  	(0x4e00 << 16) | (0xcd20 >> 2),
315  	0x00000000,
316  	(0x5e00 << 16) | (0xcd20 >> 2),
317  	0x00000000,
318  	(0x6e00 << 16) | (0xcd20 >> 2),
319  	0x00000000,
320  	(0x7e00 << 16) | (0xcd20 >> 2),
321  	0x00000000,
322  	(0x8e00 << 16) | (0xcd20 >> 2),
323  	0x00000000,
324  	(0x9e00 << 16) | (0xcd20 >> 2),
325  	0x00000000,
326  	(0xae00 << 16) | (0xcd20 >> 2),
327  	0x00000000,
328  	(0xbe00 << 16) | (0xcd20 >> 2),
329  	0x00000000,
330  	(0x0e00 << 16) | (0x89bc >> 2),
331  	0x00000000,
332  	(0x0e00 << 16) | (0x8900 >> 2),
333  	0x00000000,
334  	0x3,
335  	(0x0e00 << 16) | (0xc130 >> 2),
336  	0x00000000,
337  	(0x0e00 << 16) | (0xc134 >> 2),
338  	0x00000000,
339  	(0x0e00 << 16) | (0xc1fc >> 2),
340  	0x00000000,
341  	(0x0e00 << 16) | (0xc208 >> 2),
342  	0x00000000,
343  	(0x0e00 << 16) | (0xc264 >> 2),
344  	0x00000000,
345  	(0x0e00 << 16) | (0xc268 >> 2),
346  	0x00000000,
347  	(0x0e00 << 16) | (0xc26c >> 2),
348  	0x00000000,
349  	(0x0e00 << 16) | (0xc270 >> 2),
350  	0x00000000,
351  	(0x0e00 << 16) | (0xc274 >> 2),
352  	0x00000000,
353  	(0x0e00 << 16) | (0xc278 >> 2),
354  	0x00000000,
355  	(0x0e00 << 16) | (0xc27c >> 2),
356  	0x00000000,
357  	(0x0e00 << 16) | (0xc280 >> 2),
358  	0x00000000,
359  	(0x0e00 << 16) | (0xc284 >> 2),
360  	0x00000000,
361  	(0x0e00 << 16) | (0xc288 >> 2),
362  	0x00000000,
363  	(0x0e00 << 16) | (0xc28c >> 2),
364  	0x00000000,
365  	(0x0e00 << 16) | (0xc290 >> 2),
366  	0x00000000,
367  	(0x0e00 << 16) | (0xc294 >> 2),
368  	0x00000000,
369  	(0x0e00 << 16) | (0xc298 >> 2),
370  	0x00000000,
371  	(0x0e00 << 16) | (0xc29c >> 2),
372  	0x00000000,
373  	(0x0e00 << 16) | (0xc2a0 >> 2),
374  	0x00000000,
375  	(0x0e00 << 16) | (0xc2a4 >> 2),
376  	0x00000000,
377  	(0x0e00 << 16) | (0xc2a8 >> 2),
378  	0x00000000,
379  	(0x0e00 << 16) | (0xc2ac  >> 2),
380  	0x00000000,
381  	(0x0e00 << 16) | (0xc2b0 >> 2),
382  	0x00000000,
383  	(0x0e00 << 16) | (0x301d0 >> 2),
384  	0x00000000,
385  	(0x0e00 << 16) | (0x30238 >> 2),
386  	0x00000000,
387  	(0x0e00 << 16) | (0x30250 >> 2),
388  	0x00000000,
389  	(0x0e00 << 16) | (0x30254 >> 2),
390  	0x00000000,
391  	(0x0e00 << 16) | (0x30258 >> 2),
392  	0x00000000,
393  	(0x0e00 << 16) | (0x3025c >> 2),
394  	0x00000000,
395  	(0x4e00 << 16) | (0xc900 >> 2),
396  	0x00000000,
397  	(0x5e00 << 16) | (0xc900 >> 2),
398  	0x00000000,
399  	(0x6e00 << 16) | (0xc900 >> 2),
400  	0x00000000,
401  	(0x7e00 << 16) | (0xc900 >> 2),
402  	0x00000000,
403  	(0x8e00 << 16) | (0xc900 >> 2),
404  	0x00000000,
405  	(0x9e00 << 16) | (0xc900 >> 2),
406  	0x00000000,
407  	(0xae00 << 16) | (0xc900 >> 2),
408  	0x00000000,
409  	(0xbe00 << 16) | (0xc900 >> 2),
410  	0x00000000,
411  	(0x4e00 << 16) | (0xc904 >> 2),
412  	0x00000000,
413  	(0x5e00 << 16) | (0xc904 >> 2),
414  	0x00000000,
415  	(0x6e00 << 16) | (0xc904 >> 2),
416  	0x00000000,
417  	(0x7e00 << 16) | (0xc904 >> 2),
418  	0x00000000,
419  	(0x8e00 << 16) | (0xc904 >> 2),
420  	0x00000000,
421  	(0x9e00 << 16) | (0xc904 >> 2),
422  	0x00000000,
423  	(0xae00 << 16) | (0xc904 >> 2),
424  	0x00000000,
425  	(0xbe00 << 16) | (0xc904 >> 2),
426  	0x00000000,
427  	(0x4e00 << 16) | (0xc908 >> 2),
428  	0x00000000,
429  	(0x5e00 << 16) | (0xc908 >> 2),
430  	0x00000000,
431  	(0x6e00 << 16) | (0xc908 >> 2),
432  	0x00000000,
433  	(0x7e00 << 16) | (0xc908 >> 2),
434  	0x00000000,
435  	(0x8e00 << 16) | (0xc908 >> 2),
436  	0x00000000,
437  	(0x9e00 << 16) | (0xc908 >> 2),
438  	0x00000000,
439  	(0xae00 << 16) | (0xc908 >> 2),
440  	0x00000000,
441  	(0xbe00 << 16) | (0xc908 >> 2),
442  	0x00000000,
443  	(0x4e00 << 16) | (0xc90c >> 2),
444  	0x00000000,
445  	(0x5e00 << 16) | (0xc90c >> 2),
446  	0x00000000,
447  	(0x6e00 << 16) | (0xc90c >> 2),
448  	0x00000000,
449  	(0x7e00 << 16) | (0xc90c >> 2),
450  	0x00000000,
451  	(0x8e00 << 16) | (0xc90c >> 2),
452  	0x00000000,
453  	(0x9e00 << 16) | (0xc90c >> 2),
454  	0x00000000,
455  	(0xae00 << 16) | (0xc90c >> 2),
456  	0x00000000,
457  	(0xbe00 << 16) | (0xc90c >> 2),
458  	0x00000000,
459  	(0x4e00 << 16) | (0xc910 >> 2),
460  	0x00000000,
461  	(0x5e00 << 16) | (0xc910 >> 2),
462  	0x00000000,
463  	(0x6e00 << 16) | (0xc910 >> 2),
464  	0x00000000,
465  	(0x7e00 << 16) | (0xc910 >> 2),
466  	0x00000000,
467  	(0x8e00 << 16) | (0xc910 >> 2),
468  	0x00000000,
469  	(0x9e00 << 16) | (0xc910 >> 2),
470  	0x00000000,
471  	(0xae00 << 16) | (0xc910 >> 2),
472  	0x00000000,
473  	(0xbe00 << 16) | (0xc910 >> 2),
474  	0x00000000,
475  	(0x0e00 << 16) | (0xc99c >> 2),
476  	0x00000000,
477  	(0x0e00 << 16) | (0x9834 >> 2),
478  	0x00000000,
479  	(0x0000 << 16) | (0x30f00 >> 2),
480  	0x00000000,
481  	(0x0001 << 16) | (0x30f00 >> 2),
482  	0x00000000,
483  	(0x0000 << 16) | (0x30f04 >> 2),
484  	0x00000000,
485  	(0x0001 << 16) | (0x30f04 >> 2),
486  	0x00000000,
487  	(0x0000 << 16) | (0x30f08 >> 2),
488  	0x00000000,
489  	(0x0001 << 16) | (0x30f08 >> 2),
490  	0x00000000,
491  	(0x0000 << 16) | (0x30f0c >> 2),
492  	0x00000000,
493  	(0x0001 << 16) | (0x30f0c >> 2),
494  	0x00000000,
495  	(0x0600 << 16) | (0x9b7c >> 2),
496  	0x00000000,
497  	(0x0e00 << 16) | (0x8a14 >> 2),
498  	0x00000000,
499  	(0x0e00 << 16) | (0x8a18 >> 2),
500  	0x00000000,
501  	(0x0600 << 16) | (0x30a00 >> 2),
502  	0x00000000,
503  	(0x0e00 << 16) | (0x8bf0 >> 2),
504  	0x00000000,
505  	(0x0e00 << 16) | (0x8bcc >> 2),
506  	0x00000000,
507  	(0x0e00 << 16) | (0x8b24 >> 2),
508  	0x00000000,
509  	(0x0e00 << 16) | (0x30a04 >> 2),
510  	0x00000000,
511  	(0x0600 << 16) | (0x30a10 >> 2),
512  	0x00000000,
513  	(0x0600 << 16) | (0x30a14 >> 2),
514  	0x00000000,
515  	(0x0600 << 16) | (0x30a18 >> 2),
516  	0x00000000,
517  	(0x0600 << 16) | (0x30a2c >> 2),
518  	0x00000000,
519  	(0x0e00 << 16) | (0xc700 >> 2),
520  	0x00000000,
521  	(0x0e00 << 16) | (0xc704 >> 2),
522  	0x00000000,
523  	(0x0e00 << 16) | (0xc708 >> 2),
524  	0x00000000,
525  	(0x0e00 << 16) | (0xc768 >> 2),
526  	0x00000000,
527  	(0x0400 << 16) | (0xc770 >> 2),
528  	0x00000000,
529  	(0x0400 << 16) | (0xc774 >> 2),
530  	0x00000000,
531  	(0x0400 << 16) | (0xc778 >> 2),
532  	0x00000000,
533  	(0x0400 << 16) | (0xc77c >> 2),
534  	0x00000000,
535  	(0x0400 << 16) | (0xc780 >> 2),
536  	0x00000000,
537  	(0x0400 << 16) | (0xc784 >> 2),
538  	0x00000000,
539  	(0x0400 << 16) | (0xc788 >> 2),
540  	0x00000000,
541  	(0x0400 << 16) | (0xc78c >> 2),
542  	0x00000000,
543  	(0x0400 << 16) | (0xc798 >> 2),
544  	0x00000000,
545  	(0x0400 << 16) | (0xc79c >> 2),
546  	0x00000000,
547  	(0x0400 << 16) | (0xc7a0 >> 2),
548  	0x00000000,
549  	(0x0400 << 16) | (0xc7a4 >> 2),
550  	0x00000000,
551  	(0x0400 << 16) | (0xc7a8 >> 2),
552  	0x00000000,
553  	(0x0400 << 16) | (0xc7ac >> 2),
554  	0x00000000,
555  	(0x0400 << 16) | (0xc7b0 >> 2),
556  	0x00000000,
557  	(0x0400 << 16) | (0xc7b4 >> 2),
558  	0x00000000,
559  	(0x0e00 << 16) | (0x9100 >> 2),
560  	0x00000000,
561  	(0x0e00 << 16) | (0x3c010 >> 2),
562  	0x00000000,
563  	(0x0e00 << 16) | (0x92a8 >> 2),
564  	0x00000000,
565  	(0x0e00 << 16) | (0x92ac >> 2),
566  	0x00000000,
567  	(0x0e00 << 16) | (0x92b4 >> 2),
568  	0x00000000,
569  	(0x0e00 << 16) | (0x92b8 >> 2),
570  	0x00000000,
571  	(0x0e00 << 16) | (0x92bc >> 2),
572  	0x00000000,
573  	(0x0e00 << 16) | (0x92c0 >> 2),
574  	0x00000000,
575  	(0x0e00 << 16) | (0x92c4 >> 2),
576  	0x00000000,
577  	(0x0e00 << 16) | (0x92c8 >> 2),
578  	0x00000000,
579  	(0x0e00 << 16) | (0x92cc >> 2),
580  	0x00000000,
581  	(0x0e00 << 16) | (0x92d0 >> 2),
582  	0x00000000,
583  	(0x0e00 << 16) | (0x8c00 >> 2),
584  	0x00000000,
585  	(0x0e00 << 16) | (0x8c04 >> 2),
586  	0x00000000,
587  	(0x0e00 << 16) | (0x8c20 >> 2),
588  	0x00000000,
589  	(0x0e00 << 16) | (0x8c38 >> 2),
590  	0x00000000,
591  	(0x0e00 << 16) | (0x8c3c >> 2),
592  	0x00000000,
593  	(0x0e00 << 16) | (0xae00 >> 2),
594  	0x00000000,
595  	(0x0e00 << 16) | (0x9604 >> 2),
596  	0x00000000,
597  	(0x0e00 << 16) | (0xac08 >> 2),
598  	0x00000000,
599  	(0x0e00 << 16) | (0xac0c >> 2),
600  	0x00000000,
601  	(0x0e00 << 16) | (0xac10 >> 2),
602  	0x00000000,
603  	(0x0e00 << 16) | (0xac14 >> 2),
604  	0x00000000,
605  	(0x0e00 << 16) | (0xac58 >> 2),
606  	0x00000000,
607  	(0x0e00 << 16) | (0xac68 >> 2),
608  	0x00000000,
609  	(0x0e00 << 16) | (0xac6c >> 2),
610  	0x00000000,
611  	(0x0e00 << 16) | (0xac70 >> 2),
612  	0x00000000,
613  	(0x0e00 << 16) | (0xac74 >> 2),
614  	0x00000000,
615  	(0x0e00 << 16) | (0xac78 >> 2),
616  	0x00000000,
617  	(0x0e00 << 16) | (0xac7c >> 2),
618  	0x00000000,
619  	(0x0e00 << 16) | (0xac80 >> 2),
620  	0x00000000,
621  	(0x0e00 << 16) | (0xac84 >> 2),
622  	0x00000000,
623  	(0x0e00 << 16) | (0xac88 >> 2),
624  	0x00000000,
625  	(0x0e00 << 16) | (0xac8c >> 2),
626  	0x00000000,
627  	(0x0e00 << 16) | (0x970c >> 2),
628  	0x00000000,
629  	(0x0e00 << 16) | (0x9714 >> 2),
630  	0x00000000,
631  	(0x0e00 << 16) | (0x9718 >> 2),
632  	0x00000000,
633  	(0x0e00 << 16) | (0x971c >> 2),
634  	0x00000000,
635  	(0x0e00 << 16) | (0x31068 >> 2),
636  	0x00000000,
637  	(0x4e00 << 16) | (0x31068 >> 2),
638  	0x00000000,
639  	(0x5e00 << 16) | (0x31068 >> 2),
640  	0x00000000,
641  	(0x6e00 << 16) | (0x31068 >> 2),
642  	0x00000000,
643  	(0x7e00 << 16) | (0x31068 >> 2),
644  	0x00000000,
645  	(0x8e00 << 16) | (0x31068 >> 2),
646  	0x00000000,
647  	(0x9e00 << 16) | (0x31068 >> 2),
648  	0x00000000,
649  	(0xae00 << 16) | (0x31068 >> 2),
650  	0x00000000,
651  	(0xbe00 << 16) | (0x31068 >> 2),
652  	0x00000000,
653  	(0x0e00 << 16) | (0xcd10 >> 2),
654  	0x00000000,
655  	(0x0e00 << 16) | (0xcd14 >> 2),
656  	0x00000000,
657  	(0x0e00 << 16) | (0x88b0 >> 2),
658  	0x00000000,
659  	(0x0e00 << 16) | (0x88b4 >> 2),
660  	0x00000000,
661  	(0x0e00 << 16) | (0x88b8 >> 2),
662  	0x00000000,
663  	(0x0e00 << 16) | (0x88bc >> 2),
664  	0x00000000,
665  	(0x0400 << 16) | (0x89c0 >> 2),
666  	0x00000000,
667  	(0x0e00 << 16) | (0x88c4 >> 2),
668  	0x00000000,
669  	(0x0e00 << 16) | (0x88c8 >> 2),
670  	0x00000000,
671  	(0x0e00 << 16) | (0x88d0 >> 2),
672  	0x00000000,
673  	(0x0e00 << 16) | (0x88d4 >> 2),
674  	0x00000000,
675  	(0x0e00 << 16) | (0x88d8 >> 2),
676  	0x00000000,
677  	(0x0e00 << 16) | (0x8980 >> 2),
678  	0x00000000,
679  	(0x0e00 << 16) | (0x30938 >> 2),
680  	0x00000000,
681  	(0x0e00 << 16) | (0x3093c >> 2),
682  	0x00000000,
683  	(0x0e00 << 16) | (0x30940 >> 2),
684  	0x00000000,
685  	(0x0e00 << 16) | (0x89a0 >> 2),
686  	0x00000000,
687  	(0x0e00 << 16) | (0x30900 >> 2),
688  	0x00000000,
689  	(0x0e00 << 16) | (0x30904 >> 2),
690  	0x00000000,
691  	(0x0e00 << 16) | (0x89b4 >> 2),
692  	0x00000000,
693  	(0x0e00 << 16) | (0x3c210 >> 2),
694  	0x00000000,
695  	(0x0e00 << 16) | (0x3c214 >> 2),
696  	0x00000000,
697  	(0x0e00 << 16) | (0x3c218 >> 2),
698  	0x00000000,
699  	(0x0e00 << 16) | (0x8904 >> 2),
700  	0x00000000,
701  	0x5,
702  	(0x0e00 << 16) | (0x8c28 >> 2),
703  	(0x0e00 << 16) | (0x8c2c >> 2),
704  	(0x0e00 << 16) | (0x8c30 >> 2),
705  	(0x0e00 << 16) | (0x8c34 >> 2),
706  	(0x0e00 << 16) | (0x9600 >> 2),
707  };
708  
709  static const u32 kalindi_rlc_save_restore_register_list[] =
710  {
711  	(0x0e00 << 16) | (0xc12c >> 2),
712  	0x00000000,
713  	(0x0e00 << 16) | (0xc140 >> 2),
714  	0x00000000,
715  	(0x0e00 << 16) | (0xc150 >> 2),
716  	0x00000000,
717  	(0x0e00 << 16) | (0xc15c >> 2),
718  	0x00000000,
719  	(0x0e00 << 16) | (0xc168 >> 2),
720  	0x00000000,
721  	(0x0e00 << 16) | (0xc170 >> 2),
722  	0x00000000,
723  	(0x0e00 << 16) | (0xc204 >> 2),
724  	0x00000000,
725  	(0x0e00 << 16) | (0xc2b4 >> 2),
726  	0x00000000,
727  	(0x0e00 << 16) | (0xc2b8 >> 2),
728  	0x00000000,
729  	(0x0e00 << 16) | (0xc2bc >> 2),
730  	0x00000000,
731  	(0x0e00 << 16) | (0xc2c0 >> 2),
732  	0x00000000,
733  	(0x0e00 << 16) | (0x8228 >> 2),
734  	0x00000000,
735  	(0x0e00 << 16) | (0x829c >> 2),
736  	0x00000000,
737  	(0x0e00 << 16) | (0x869c >> 2),
738  	0x00000000,
739  	(0x0600 << 16) | (0x98f4 >> 2),
740  	0x00000000,
741  	(0x0e00 << 16) | (0x98f8 >> 2),
742  	0x00000000,
743  	(0x0e00 << 16) | (0x9900 >> 2),
744  	0x00000000,
745  	(0x0e00 << 16) | (0xc260 >> 2),
746  	0x00000000,
747  	(0x0e00 << 16) | (0x90e8 >> 2),
748  	0x00000000,
749  	(0x0e00 << 16) | (0x3c000 >> 2),
750  	0x00000000,
751  	(0x0e00 << 16) | (0x3c00c >> 2),
752  	0x00000000,
753  	(0x0e00 << 16) | (0x8c1c >> 2),
754  	0x00000000,
755  	(0x0e00 << 16) | (0x9700 >> 2),
756  	0x00000000,
757  	(0x0e00 << 16) | (0xcd20 >> 2),
758  	0x00000000,
759  	(0x4e00 << 16) | (0xcd20 >> 2),
760  	0x00000000,
761  	(0x5e00 << 16) | (0xcd20 >> 2),
762  	0x00000000,
763  	(0x6e00 << 16) | (0xcd20 >> 2),
764  	0x00000000,
765  	(0x7e00 << 16) | (0xcd20 >> 2),
766  	0x00000000,
767  	(0x0e00 << 16) | (0x89bc >> 2),
768  	0x00000000,
769  	(0x0e00 << 16) | (0x8900 >> 2),
770  	0x00000000,
771  	0x3,
772  	(0x0e00 << 16) | (0xc130 >> 2),
773  	0x00000000,
774  	(0x0e00 << 16) | (0xc134 >> 2),
775  	0x00000000,
776  	(0x0e00 << 16) | (0xc1fc >> 2),
777  	0x00000000,
778  	(0x0e00 << 16) | (0xc208 >> 2),
779  	0x00000000,
780  	(0x0e00 << 16) | (0xc264 >> 2),
781  	0x00000000,
782  	(0x0e00 << 16) | (0xc268 >> 2),
783  	0x00000000,
784  	(0x0e00 << 16) | (0xc26c >> 2),
785  	0x00000000,
786  	(0x0e00 << 16) | (0xc270 >> 2),
787  	0x00000000,
788  	(0x0e00 << 16) | (0xc274 >> 2),
789  	0x00000000,
790  	(0x0e00 << 16) | (0xc28c >> 2),
791  	0x00000000,
792  	(0x0e00 << 16) | (0xc290 >> 2),
793  	0x00000000,
794  	(0x0e00 << 16) | (0xc294 >> 2),
795  	0x00000000,
796  	(0x0e00 << 16) | (0xc298 >> 2),
797  	0x00000000,
798  	(0x0e00 << 16) | (0xc2a0 >> 2),
799  	0x00000000,
800  	(0x0e00 << 16) | (0xc2a4 >> 2),
801  	0x00000000,
802  	(0x0e00 << 16) | (0xc2a8 >> 2),
803  	0x00000000,
804  	(0x0e00 << 16) | (0xc2ac >> 2),
805  	0x00000000,
806  	(0x0e00 << 16) | (0x301d0 >> 2),
807  	0x00000000,
808  	(0x0e00 << 16) | (0x30238 >> 2),
809  	0x00000000,
810  	(0x0e00 << 16) | (0x30250 >> 2),
811  	0x00000000,
812  	(0x0e00 << 16) | (0x30254 >> 2),
813  	0x00000000,
814  	(0x0e00 << 16) | (0x30258 >> 2),
815  	0x00000000,
816  	(0x0e00 << 16) | (0x3025c >> 2),
817  	0x00000000,
818  	(0x4e00 << 16) | (0xc900 >> 2),
819  	0x00000000,
820  	(0x5e00 << 16) | (0xc900 >> 2),
821  	0x00000000,
822  	(0x6e00 << 16) | (0xc900 >> 2),
823  	0x00000000,
824  	(0x7e00 << 16) | (0xc900 >> 2),
825  	0x00000000,
826  	(0x4e00 << 16) | (0xc904 >> 2),
827  	0x00000000,
828  	(0x5e00 << 16) | (0xc904 >> 2),
829  	0x00000000,
830  	(0x6e00 << 16) | (0xc904 >> 2),
831  	0x00000000,
832  	(0x7e00 << 16) | (0xc904 >> 2),
833  	0x00000000,
834  	(0x4e00 << 16) | (0xc908 >> 2),
835  	0x00000000,
836  	(0x5e00 << 16) | (0xc908 >> 2),
837  	0x00000000,
838  	(0x6e00 << 16) | (0xc908 >> 2),
839  	0x00000000,
840  	(0x7e00 << 16) | (0xc908 >> 2),
841  	0x00000000,
842  	(0x4e00 << 16) | (0xc90c >> 2),
843  	0x00000000,
844  	(0x5e00 << 16) | (0xc90c >> 2),
845  	0x00000000,
846  	(0x6e00 << 16) | (0xc90c >> 2),
847  	0x00000000,
848  	(0x7e00 << 16) | (0xc90c >> 2),
849  	0x00000000,
850  	(0x4e00 << 16) | (0xc910 >> 2),
851  	0x00000000,
852  	(0x5e00 << 16) | (0xc910 >> 2),
853  	0x00000000,
854  	(0x6e00 << 16) | (0xc910 >> 2),
855  	0x00000000,
856  	(0x7e00 << 16) | (0xc910 >> 2),
857  	0x00000000,
858  	(0x0e00 << 16) | (0xc99c >> 2),
859  	0x00000000,
860  	(0x0e00 << 16) | (0x9834 >> 2),
861  	0x00000000,
862  	(0x0000 << 16) | (0x30f00 >> 2),
863  	0x00000000,
864  	(0x0000 << 16) | (0x30f04 >> 2),
865  	0x00000000,
866  	(0x0000 << 16) | (0x30f08 >> 2),
867  	0x00000000,
868  	(0x0000 << 16) | (0x30f0c >> 2),
869  	0x00000000,
870  	(0x0600 << 16) | (0x9b7c >> 2),
871  	0x00000000,
872  	(0x0e00 << 16) | (0x8a14 >> 2),
873  	0x00000000,
874  	(0x0e00 << 16) | (0x8a18 >> 2),
875  	0x00000000,
876  	(0x0600 << 16) | (0x30a00 >> 2),
877  	0x00000000,
878  	(0x0e00 << 16) | (0x8bf0 >> 2),
879  	0x00000000,
880  	(0x0e00 << 16) | (0x8bcc >> 2),
881  	0x00000000,
882  	(0x0e00 << 16) | (0x8b24 >> 2),
883  	0x00000000,
884  	(0x0e00 << 16) | (0x30a04 >> 2),
885  	0x00000000,
886  	(0x0600 << 16) | (0x30a10 >> 2),
887  	0x00000000,
888  	(0x0600 << 16) | (0x30a14 >> 2),
889  	0x00000000,
890  	(0x0600 << 16) | (0x30a18 >> 2),
891  	0x00000000,
892  	(0x0600 << 16) | (0x30a2c >> 2),
893  	0x00000000,
894  	(0x0e00 << 16) | (0xc700 >> 2),
895  	0x00000000,
896  	(0x0e00 << 16) | (0xc704 >> 2),
897  	0x00000000,
898  	(0x0e00 << 16) | (0xc708 >> 2),
899  	0x00000000,
900  	(0x0e00 << 16) | (0xc768 >> 2),
901  	0x00000000,
902  	(0x0400 << 16) | (0xc770 >> 2),
903  	0x00000000,
904  	(0x0400 << 16) | (0xc774 >> 2),
905  	0x00000000,
906  	(0x0400 << 16) | (0xc798 >> 2),
907  	0x00000000,
908  	(0x0400 << 16) | (0xc79c >> 2),
909  	0x00000000,
910  	(0x0e00 << 16) | (0x9100 >> 2),
911  	0x00000000,
912  	(0x0e00 << 16) | (0x3c010 >> 2),
913  	0x00000000,
914  	(0x0e00 << 16) | (0x8c00 >> 2),
915  	0x00000000,
916  	(0x0e00 << 16) | (0x8c04 >> 2),
917  	0x00000000,
918  	(0x0e00 << 16) | (0x8c20 >> 2),
919  	0x00000000,
920  	(0x0e00 << 16) | (0x8c38 >> 2),
921  	0x00000000,
922  	(0x0e00 << 16) | (0x8c3c >> 2),
923  	0x00000000,
924  	(0x0e00 << 16) | (0xae00 >> 2),
925  	0x00000000,
926  	(0x0e00 << 16) | (0x9604 >> 2),
927  	0x00000000,
928  	(0x0e00 << 16) | (0xac08 >> 2),
929  	0x00000000,
930  	(0x0e00 << 16) | (0xac0c >> 2),
931  	0x00000000,
932  	(0x0e00 << 16) | (0xac10 >> 2),
933  	0x00000000,
934  	(0x0e00 << 16) | (0xac14 >> 2),
935  	0x00000000,
936  	(0x0e00 << 16) | (0xac58 >> 2),
937  	0x00000000,
938  	(0x0e00 << 16) | (0xac68 >> 2),
939  	0x00000000,
940  	(0x0e00 << 16) | (0xac6c >> 2),
941  	0x00000000,
942  	(0x0e00 << 16) | (0xac70 >> 2),
943  	0x00000000,
944  	(0x0e00 << 16) | (0xac74 >> 2),
945  	0x00000000,
946  	(0x0e00 << 16) | (0xac78 >> 2),
947  	0x00000000,
948  	(0x0e00 << 16) | (0xac7c >> 2),
949  	0x00000000,
950  	(0x0e00 << 16) | (0xac80 >> 2),
951  	0x00000000,
952  	(0x0e00 << 16) | (0xac84 >> 2),
953  	0x00000000,
954  	(0x0e00 << 16) | (0xac88 >> 2),
955  	0x00000000,
956  	(0x0e00 << 16) | (0xac8c >> 2),
957  	0x00000000,
958  	(0x0e00 << 16) | (0x970c >> 2),
959  	0x00000000,
960  	(0x0e00 << 16) | (0x9714 >> 2),
961  	0x00000000,
962  	(0x0e00 << 16) | (0x9718 >> 2),
963  	0x00000000,
964  	(0x0e00 << 16) | (0x971c >> 2),
965  	0x00000000,
966  	(0x0e00 << 16) | (0x31068 >> 2),
967  	0x00000000,
968  	(0x4e00 << 16) | (0x31068 >> 2),
969  	0x00000000,
970  	(0x5e00 << 16) | (0x31068 >> 2),
971  	0x00000000,
972  	(0x6e00 << 16) | (0x31068 >> 2),
973  	0x00000000,
974  	(0x7e00 << 16) | (0x31068 >> 2),
975  	0x00000000,
976  	(0x0e00 << 16) | (0xcd10 >> 2),
977  	0x00000000,
978  	(0x0e00 << 16) | (0xcd14 >> 2),
979  	0x00000000,
980  	(0x0e00 << 16) | (0x88b0 >> 2),
981  	0x00000000,
982  	(0x0e00 << 16) | (0x88b4 >> 2),
983  	0x00000000,
984  	(0x0e00 << 16) | (0x88b8 >> 2),
985  	0x00000000,
986  	(0x0e00 << 16) | (0x88bc >> 2),
987  	0x00000000,
988  	(0x0400 << 16) | (0x89c0 >> 2),
989  	0x00000000,
990  	(0x0e00 << 16) | (0x88c4 >> 2),
991  	0x00000000,
992  	(0x0e00 << 16) | (0x88c8 >> 2),
993  	0x00000000,
994  	(0x0e00 << 16) | (0x88d0 >> 2),
995  	0x00000000,
996  	(0x0e00 << 16) | (0x88d4 >> 2),
997  	0x00000000,
998  	(0x0e00 << 16) | (0x88d8 >> 2),
999  	0x00000000,
1000  	(0x0e00 << 16) | (0x8980 >> 2),
1001  	0x00000000,
1002  	(0x0e00 << 16) | (0x30938 >> 2),
1003  	0x00000000,
1004  	(0x0e00 << 16) | (0x3093c >> 2),
1005  	0x00000000,
1006  	(0x0e00 << 16) | (0x30940 >> 2),
1007  	0x00000000,
1008  	(0x0e00 << 16) | (0x89a0 >> 2),
1009  	0x00000000,
1010  	(0x0e00 << 16) | (0x30900 >> 2),
1011  	0x00000000,
1012  	(0x0e00 << 16) | (0x30904 >> 2),
1013  	0x00000000,
1014  	(0x0e00 << 16) | (0x89b4 >> 2),
1015  	0x00000000,
1016  	(0x0e00 << 16) | (0x3e1fc >> 2),
1017  	0x00000000,
1018  	(0x0e00 << 16) | (0x3c210 >> 2),
1019  	0x00000000,
1020  	(0x0e00 << 16) | (0x3c214 >> 2),
1021  	0x00000000,
1022  	(0x0e00 << 16) | (0x3c218 >> 2),
1023  	0x00000000,
1024  	(0x0e00 << 16) | (0x8904 >> 2),
1025  	0x00000000,
1026  	0x5,
1027  	(0x0e00 << 16) | (0x8c28 >> 2),
1028  	(0x0e00 << 16) | (0x8c2c >> 2),
1029  	(0x0e00 << 16) | (0x8c30 >> 2),
1030  	(0x0e00 << 16) | (0x8c34 >> 2),
1031  	(0x0e00 << 16) | (0x9600 >> 2),
1032  };
1033  
1034  static const u32 bonaire_golden_spm_registers[] =
1035  {
1036  	0x30800, 0xe0ffffff, 0xe0000000
1037  };
1038  
1039  static const u32 bonaire_golden_common_registers[] =
1040  {
1041  	0xc770, 0xffffffff, 0x00000800,
1042  	0xc774, 0xffffffff, 0x00000800,
1043  	0xc798, 0xffffffff, 0x00007fbf,
1044  	0xc79c, 0xffffffff, 0x00007faf
1045  };
1046  
1047  static const u32 bonaire_golden_registers[] =
1048  {
1049  	0x3354, 0x00000333, 0x00000333,
1050  	0x3350, 0x000c0fc0, 0x00040200,
1051  	0x9a10, 0x00010000, 0x00058208,
1052  	0x3c000, 0xffff1fff, 0x00140000,
1053  	0x3c200, 0xfdfc0fff, 0x00000100,
1054  	0x3c234, 0x40000000, 0x40000200,
1055  	0x9830, 0xffffffff, 0x00000000,
1056  	0x9834, 0xf00fffff, 0x00000400,
1057  	0x9838, 0x0002021c, 0x00020200,
1058  	0xc78, 0x00000080, 0x00000000,
1059  	0x5bb0, 0x000000f0, 0x00000070,
1060  	0x5bc0, 0xf0311fff, 0x80300000,
1061  	0x98f8, 0x73773777, 0x12010001,
1062  	0x350c, 0x00810000, 0x408af000,
1063  	0x7030, 0x31000111, 0x00000011,
1064  	0x2f48, 0x73773777, 0x12010001,
1065  	0x220c, 0x00007fb6, 0x0021a1b1,
1066  	0x2210, 0x00007fb6, 0x002021b1,
1067  	0x2180, 0x00007fb6, 0x00002191,
1068  	0x2218, 0x00007fb6, 0x002121b1,
1069  	0x221c, 0x00007fb6, 0x002021b1,
1070  	0x21dc, 0x00007fb6, 0x00002191,
1071  	0x21e0, 0x00007fb6, 0x00002191,
1072  	0x3628, 0x0000003f, 0x0000000a,
1073  	0x362c, 0x0000003f, 0x0000000a,
1074  	0x2ae4, 0x00073ffe, 0x000022a2,
1075  	0x240c, 0x000007ff, 0x00000000,
1076  	0x8a14, 0xf000003f, 0x00000007,
1077  	0x8bf0, 0x00002001, 0x00000001,
1078  	0x8b24, 0xffffffff, 0x00ffffff,
1079  	0x30a04, 0x0000ff0f, 0x00000000,
1080  	0x28a4c, 0x07ffffff, 0x06000000,
1081  	0x4d8, 0x00000fff, 0x00000100,
1082  	0x3e78, 0x00000001, 0x00000002,
1083  	0x9100, 0x03000000, 0x0362c688,
1084  	0x8c00, 0x000000ff, 0x00000001,
1085  	0xe40, 0x00001fff, 0x00001fff,
1086  	0x9060, 0x0000007f, 0x00000020,
1087  	0x9508, 0x00010000, 0x00010000,
1088  	0xac14, 0x000003ff, 0x000000f3,
1089  	0xac0c, 0xffffffff, 0x00001032
1090  };
1091  
1092  static const u32 bonaire_mgcg_cgcg_init[] =
1093  {
1094  	0xc420, 0xffffffff, 0xfffffffc,
1095  	0x30800, 0xffffffff, 0xe0000000,
1096  	0x3c2a0, 0xffffffff, 0x00000100,
1097  	0x3c208, 0xffffffff, 0x00000100,
1098  	0x3c2c0, 0xffffffff, 0xc0000100,
1099  	0x3c2c8, 0xffffffff, 0xc0000100,
1100  	0x3c2c4, 0xffffffff, 0xc0000100,
1101  	0x55e4, 0xffffffff, 0x00600100,
1102  	0x3c280, 0xffffffff, 0x00000100,
1103  	0x3c214, 0xffffffff, 0x06000100,
1104  	0x3c220, 0xffffffff, 0x00000100,
1105  	0x3c218, 0xffffffff, 0x06000100,
1106  	0x3c204, 0xffffffff, 0x00000100,
1107  	0x3c2e0, 0xffffffff, 0x00000100,
1108  	0x3c224, 0xffffffff, 0x00000100,
1109  	0x3c200, 0xffffffff, 0x00000100,
1110  	0x3c230, 0xffffffff, 0x00000100,
1111  	0x3c234, 0xffffffff, 0x00000100,
1112  	0x3c250, 0xffffffff, 0x00000100,
1113  	0x3c254, 0xffffffff, 0x00000100,
1114  	0x3c258, 0xffffffff, 0x00000100,
1115  	0x3c25c, 0xffffffff, 0x00000100,
1116  	0x3c260, 0xffffffff, 0x00000100,
1117  	0x3c27c, 0xffffffff, 0x00000100,
1118  	0x3c278, 0xffffffff, 0x00000100,
1119  	0x3c210, 0xffffffff, 0x06000100,
1120  	0x3c290, 0xffffffff, 0x00000100,
1121  	0x3c274, 0xffffffff, 0x00000100,
1122  	0x3c2b4, 0xffffffff, 0x00000100,
1123  	0x3c2b0, 0xffffffff, 0x00000100,
1124  	0x3c270, 0xffffffff, 0x00000100,
1125  	0x30800, 0xffffffff, 0xe0000000,
1126  	0x3c020, 0xffffffff, 0x00010000,
1127  	0x3c024, 0xffffffff, 0x00030002,
1128  	0x3c028, 0xffffffff, 0x00040007,
1129  	0x3c02c, 0xffffffff, 0x00060005,
1130  	0x3c030, 0xffffffff, 0x00090008,
1131  	0x3c034, 0xffffffff, 0x00010000,
1132  	0x3c038, 0xffffffff, 0x00030002,
1133  	0x3c03c, 0xffffffff, 0x00040007,
1134  	0x3c040, 0xffffffff, 0x00060005,
1135  	0x3c044, 0xffffffff, 0x00090008,
1136  	0x3c048, 0xffffffff, 0x00010000,
1137  	0x3c04c, 0xffffffff, 0x00030002,
1138  	0x3c050, 0xffffffff, 0x00040007,
1139  	0x3c054, 0xffffffff, 0x00060005,
1140  	0x3c058, 0xffffffff, 0x00090008,
1141  	0x3c05c, 0xffffffff, 0x00010000,
1142  	0x3c060, 0xffffffff, 0x00030002,
1143  	0x3c064, 0xffffffff, 0x00040007,
1144  	0x3c068, 0xffffffff, 0x00060005,
1145  	0x3c06c, 0xffffffff, 0x00090008,
1146  	0x3c070, 0xffffffff, 0x00010000,
1147  	0x3c074, 0xffffffff, 0x00030002,
1148  	0x3c078, 0xffffffff, 0x00040007,
1149  	0x3c07c, 0xffffffff, 0x00060005,
1150  	0x3c080, 0xffffffff, 0x00090008,
1151  	0x3c084, 0xffffffff, 0x00010000,
1152  	0x3c088, 0xffffffff, 0x00030002,
1153  	0x3c08c, 0xffffffff, 0x00040007,
1154  	0x3c090, 0xffffffff, 0x00060005,
1155  	0x3c094, 0xffffffff, 0x00090008,
1156  	0x3c098, 0xffffffff, 0x00010000,
1157  	0x3c09c, 0xffffffff, 0x00030002,
1158  	0x3c0a0, 0xffffffff, 0x00040007,
1159  	0x3c0a4, 0xffffffff, 0x00060005,
1160  	0x3c0a8, 0xffffffff, 0x00090008,
1161  	0x3c000, 0xffffffff, 0x96e00200,
1162  	0x8708, 0xffffffff, 0x00900100,
1163  	0xc424, 0xffffffff, 0x0020003f,
1164  	0x38, 0xffffffff, 0x0140001c,
1165  	0x3c, 0x000f0000, 0x000f0000,
1166  	0x220, 0xffffffff, 0xC060000C,
1167  	0x224, 0xc0000fff, 0x00000100,
1168  	0xf90, 0xffffffff, 0x00000100,
1169  	0xf98, 0x00000101, 0x00000000,
1170  	0x20a8, 0xffffffff, 0x00000104,
1171  	0x55e4, 0xff000fff, 0x00000100,
1172  	0x30cc, 0xc0000fff, 0x00000104,
1173  	0xc1e4, 0x00000001, 0x00000001,
1174  	0xd00c, 0xff000ff0, 0x00000100,
1175  	0xd80c, 0xff000ff0, 0x00000100
1176  };
1177  
1178  static const u32 spectre_golden_spm_registers[] =
1179  {
1180  	0x30800, 0xe0ffffff, 0xe0000000
1181  };
1182  
1183  static const u32 spectre_golden_common_registers[] =
1184  {
1185  	0xc770, 0xffffffff, 0x00000800,
1186  	0xc774, 0xffffffff, 0x00000800,
1187  	0xc798, 0xffffffff, 0x00007fbf,
1188  	0xc79c, 0xffffffff, 0x00007faf
1189  };
1190  
1191  static const u32 spectre_golden_registers[] =
1192  {
1193  	0x3c000, 0xffff1fff, 0x96940200,
1194  	0x3c00c, 0xffff0001, 0xff000000,
1195  	0x3c200, 0xfffc0fff, 0x00000100,
1196  	0x6ed8, 0x00010101, 0x00010000,
1197  	0x9834, 0xf00fffff, 0x00000400,
1198  	0x9838, 0xfffffffc, 0x00020200,
1199  	0x5bb0, 0x000000f0, 0x00000070,
1200  	0x5bc0, 0xf0311fff, 0x80300000,
1201  	0x98f8, 0x73773777, 0x12010001,
1202  	0x9b7c, 0x00ff0000, 0x00fc0000,
1203  	0x2f48, 0x73773777, 0x12010001,
1204  	0x8a14, 0xf000003f, 0x00000007,
1205  	0x8b24, 0xffffffff, 0x00ffffff,
1206  	0x28350, 0x3f3f3fff, 0x00000082,
1207  	0x28354, 0x0000003f, 0x00000000,
1208  	0x3e78, 0x00000001, 0x00000002,
1209  	0x913c, 0xffff03df, 0x00000004,
1210  	0xc768, 0x00000008, 0x00000008,
1211  	0x8c00, 0x000008ff, 0x00000800,
1212  	0x9508, 0x00010000, 0x00010000,
1213  	0xac0c, 0xffffffff, 0x54763210,
1214  	0x214f8, 0x01ff01ff, 0x00000002,
1215  	0x21498, 0x007ff800, 0x00200000,
1216  	0x2015c, 0xffffffff, 0x00000f40,
1217  	0x30934, 0xffffffff, 0x00000001
1218  };
1219  
1220  static const u32 spectre_mgcg_cgcg_init[] =
1221  {
1222  	0xc420, 0xffffffff, 0xfffffffc,
1223  	0x30800, 0xffffffff, 0xe0000000,
1224  	0x3c2a0, 0xffffffff, 0x00000100,
1225  	0x3c208, 0xffffffff, 0x00000100,
1226  	0x3c2c0, 0xffffffff, 0x00000100,
1227  	0x3c2c8, 0xffffffff, 0x00000100,
1228  	0x3c2c4, 0xffffffff, 0x00000100,
1229  	0x55e4, 0xffffffff, 0x00600100,
1230  	0x3c280, 0xffffffff, 0x00000100,
1231  	0x3c214, 0xffffffff, 0x06000100,
1232  	0x3c220, 0xffffffff, 0x00000100,
1233  	0x3c218, 0xffffffff, 0x06000100,
1234  	0x3c204, 0xffffffff, 0x00000100,
1235  	0x3c2e0, 0xffffffff, 0x00000100,
1236  	0x3c224, 0xffffffff, 0x00000100,
1237  	0x3c200, 0xffffffff, 0x00000100,
1238  	0x3c230, 0xffffffff, 0x00000100,
1239  	0x3c234, 0xffffffff, 0x00000100,
1240  	0x3c250, 0xffffffff, 0x00000100,
1241  	0x3c254, 0xffffffff, 0x00000100,
1242  	0x3c258, 0xffffffff, 0x00000100,
1243  	0x3c25c, 0xffffffff, 0x00000100,
1244  	0x3c260, 0xffffffff, 0x00000100,
1245  	0x3c27c, 0xffffffff, 0x00000100,
1246  	0x3c278, 0xffffffff, 0x00000100,
1247  	0x3c210, 0xffffffff, 0x06000100,
1248  	0x3c290, 0xffffffff, 0x00000100,
1249  	0x3c274, 0xffffffff, 0x00000100,
1250  	0x3c2b4, 0xffffffff, 0x00000100,
1251  	0x3c2b0, 0xffffffff, 0x00000100,
1252  	0x3c270, 0xffffffff, 0x00000100,
1253  	0x30800, 0xffffffff, 0xe0000000,
1254  	0x3c020, 0xffffffff, 0x00010000,
1255  	0x3c024, 0xffffffff, 0x00030002,
1256  	0x3c028, 0xffffffff, 0x00040007,
1257  	0x3c02c, 0xffffffff, 0x00060005,
1258  	0x3c030, 0xffffffff, 0x00090008,
1259  	0x3c034, 0xffffffff, 0x00010000,
1260  	0x3c038, 0xffffffff, 0x00030002,
1261  	0x3c03c, 0xffffffff, 0x00040007,
1262  	0x3c040, 0xffffffff, 0x00060005,
1263  	0x3c044, 0xffffffff, 0x00090008,
1264  	0x3c048, 0xffffffff, 0x00010000,
1265  	0x3c04c, 0xffffffff, 0x00030002,
1266  	0x3c050, 0xffffffff, 0x00040007,
1267  	0x3c054, 0xffffffff, 0x00060005,
1268  	0x3c058, 0xffffffff, 0x00090008,
1269  	0x3c05c, 0xffffffff, 0x00010000,
1270  	0x3c060, 0xffffffff, 0x00030002,
1271  	0x3c064, 0xffffffff, 0x00040007,
1272  	0x3c068, 0xffffffff, 0x00060005,
1273  	0x3c06c, 0xffffffff, 0x00090008,
1274  	0x3c070, 0xffffffff, 0x00010000,
1275  	0x3c074, 0xffffffff, 0x00030002,
1276  	0x3c078, 0xffffffff, 0x00040007,
1277  	0x3c07c, 0xffffffff, 0x00060005,
1278  	0x3c080, 0xffffffff, 0x00090008,
1279  	0x3c084, 0xffffffff, 0x00010000,
1280  	0x3c088, 0xffffffff, 0x00030002,
1281  	0x3c08c, 0xffffffff, 0x00040007,
1282  	0x3c090, 0xffffffff, 0x00060005,
1283  	0x3c094, 0xffffffff, 0x00090008,
1284  	0x3c098, 0xffffffff, 0x00010000,
1285  	0x3c09c, 0xffffffff, 0x00030002,
1286  	0x3c0a0, 0xffffffff, 0x00040007,
1287  	0x3c0a4, 0xffffffff, 0x00060005,
1288  	0x3c0a8, 0xffffffff, 0x00090008,
1289  	0x3c0ac, 0xffffffff, 0x00010000,
1290  	0x3c0b0, 0xffffffff, 0x00030002,
1291  	0x3c0b4, 0xffffffff, 0x00040007,
1292  	0x3c0b8, 0xffffffff, 0x00060005,
1293  	0x3c0bc, 0xffffffff, 0x00090008,
1294  	0x3c000, 0xffffffff, 0x96e00200,
1295  	0x8708, 0xffffffff, 0x00900100,
1296  	0xc424, 0xffffffff, 0x0020003f,
1297  	0x38, 0xffffffff, 0x0140001c,
1298  	0x3c, 0x000f0000, 0x000f0000,
1299  	0x220, 0xffffffff, 0xC060000C,
1300  	0x224, 0xc0000fff, 0x00000100,
1301  	0xf90, 0xffffffff, 0x00000100,
1302  	0xf98, 0x00000101, 0x00000000,
1303  	0x20a8, 0xffffffff, 0x00000104,
1304  	0x55e4, 0xff000fff, 0x00000100,
1305  	0x30cc, 0xc0000fff, 0x00000104,
1306  	0xc1e4, 0x00000001, 0x00000001,
1307  	0xd00c, 0xff000ff0, 0x00000100,
1308  	0xd80c, 0xff000ff0, 0x00000100
1309  };
1310  
1311  static const u32 kalindi_golden_spm_registers[] =
1312  {
1313  	0x30800, 0xe0ffffff, 0xe0000000
1314  };
1315  
1316  static const u32 kalindi_golden_common_registers[] =
1317  {
1318  	0xc770, 0xffffffff, 0x00000800,
1319  	0xc774, 0xffffffff, 0x00000800,
1320  	0xc798, 0xffffffff, 0x00007fbf,
1321  	0xc79c, 0xffffffff, 0x00007faf
1322  };
1323  
1324  static const u32 kalindi_golden_registers[] =
1325  {
1326  	0x3c000, 0xffffdfff, 0x6e944040,
1327  	0x55e4, 0xff607fff, 0xfc000100,
1328  	0x3c220, 0xff000fff, 0x00000100,
1329  	0x3c224, 0xff000fff, 0x00000100,
1330  	0x3c200, 0xfffc0fff, 0x00000100,
1331  	0x6ed8, 0x00010101, 0x00010000,
1332  	0x9830, 0xffffffff, 0x00000000,
1333  	0x9834, 0xf00fffff, 0x00000400,
1334  	0x5bb0, 0x000000f0, 0x00000070,
1335  	0x5bc0, 0xf0311fff, 0x80300000,
1336  	0x98f8, 0x73773777, 0x12010001,
1337  	0x98fc, 0xffffffff, 0x00000010,
1338  	0x9b7c, 0x00ff0000, 0x00fc0000,
1339  	0x8030, 0x00001f0f, 0x0000100a,
1340  	0x2f48, 0x73773777, 0x12010001,
1341  	0x2408, 0x000fffff, 0x000c007f,
1342  	0x8a14, 0xf000003f, 0x00000007,
1343  	0x8b24, 0x3fff3fff, 0x00ffcfff,
1344  	0x30a04, 0x0000ff0f, 0x00000000,
1345  	0x28a4c, 0x07ffffff, 0x06000000,
1346  	0x4d8, 0x00000fff, 0x00000100,
1347  	0x3e78, 0x00000001, 0x00000002,
1348  	0xc768, 0x00000008, 0x00000008,
1349  	0x8c00, 0x000000ff, 0x00000003,
1350  	0x214f8, 0x01ff01ff, 0x00000002,
1351  	0x21498, 0x007ff800, 0x00200000,
1352  	0x2015c, 0xffffffff, 0x00000f40,
1353  	0x88c4, 0x001f3ae3, 0x00000082,
1354  	0x88d4, 0x0000001f, 0x00000010,
1355  	0x30934, 0xffffffff, 0x00000000
1356  };
1357  
1358  static const u32 kalindi_mgcg_cgcg_init[] =
1359  {
1360  	0xc420, 0xffffffff, 0xfffffffc,
1361  	0x30800, 0xffffffff, 0xe0000000,
1362  	0x3c2a0, 0xffffffff, 0x00000100,
1363  	0x3c208, 0xffffffff, 0x00000100,
1364  	0x3c2c0, 0xffffffff, 0x00000100,
1365  	0x3c2c8, 0xffffffff, 0x00000100,
1366  	0x3c2c4, 0xffffffff, 0x00000100,
1367  	0x55e4, 0xffffffff, 0x00600100,
1368  	0x3c280, 0xffffffff, 0x00000100,
1369  	0x3c214, 0xffffffff, 0x06000100,
1370  	0x3c220, 0xffffffff, 0x00000100,
1371  	0x3c218, 0xffffffff, 0x06000100,
1372  	0x3c204, 0xffffffff, 0x00000100,
1373  	0x3c2e0, 0xffffffff, 0x00000100,
1374  	0x3c224, 0xffffffff, 0x00000100,
1375  	0x3c200, 0xffffffff, 0x00000100,
1376  	0x3c230, 0xffffffff, 0x00000100,
1377  	0x3c234, 0xffffffff, 0x00000100,
1378  	0x3c250, 0xffffffff, 0x00000100,
1379  	0x3c254, 0xffffffff, 0x00000100,
1380  	0x3c258, 0xffffffff, 0x00000100,
1381  	0x3c25c, 0xffffffff, 0x00000100,
1382  	0x3c260, 0xffffffff, 0x00000100,
1383  	0x3c27c, 0xffffffff, 0x00000100,
1384  	0x3c278, 0xffffffff, 0x00000100,
1385  	0x3c210, 0xffffffff, 0x06000100,
1386  	0x3c290, 0xffffffff, 0x00000100,
1387  	0x3c274, 0xffffffff, 0x00000100,
1388  	0x3c2b4, 0xffffffff, 0x00000100,
1389  	0x3c2b0, 0xffffffff, 0x00000100,
1390  	0x3c270, 0xffffffff, 0x00000100,
1391  	0x30800, 0xffffffff, 0xe0000000,
1392  	0x3c020, 0xffffffff, 0x00010000,
1393  	0x3c024, 0xffffffff, 0x00030002,
1394  	0x3c028, 0xffffffff, 0x00040007,
1395  	0x3c02c, 0xffffffff, 0x00060005,
1396  	0x3c030, 0xffffffff, 0x00090008,
1397  	0x3c034, 0xffffffff, 0x00010000,
1398  	0x3c038, 0xffffffff, 0x00030002,
1399  	0x3c03c, 0xffffffff, 0x00040007,
1400  	0x3c040, 0xffffffff, 0x00060005,
1401  	0x3c044, 0xffffffff, 0x00090008,
1402  	0x3c000, 0xffffffff, 0x96e00200,
1403  	0x8708, 0xffffffff, 0x00900100,
1404  	0xc424, 0xffffffff, 0x0020003f,
1405  	0x38, 0xffffffff, 0x0140001c,
1406  	0x3c, 0x000f0000, 0x000f0000,
1407  	0x220, 0xffffffff, 0xC060000C,
1408  	0x224, 0xc0000fff, 0x00000100,
1409  	0x20a8, 0xffffffff, 0x00000104,
1410  	0x55e4, 0xff000fff, 0x00000100,
1411  	0x30cc, 0xc0000fff, 0x00000104,
1412  	0xc1e4, 0x00000001, 0x00000001,
1413  	0xd00c, 0xff000ff0, 0x00000100,
1414  	0xd80c, 0xff000ff0, 0x00000100
1415  };
1416  
1417  static const u32 hawaii_golden_spm_registers[] =
1418  {
1419  	0x30800, 0xe0ffffff, 0xe0000000
1420  };
1421  
1422  static const u32 hawaii_golden_common_registers[] =
1423  {
1424  	0x30800, 0xffffffff, 0xe0000000,
1425  	0x28350, 0xffffffff, 0x3a00161a,
1426  	0x28354, 0xffffffff, 0x0000002e,
1427  	0x9a10, 0xffffffff, 0x00018208,
1428  	0x98f8, 0xffffffff, 0x12011003
1429  };
1430  
1431  static const u32 hawaii_golden_registers[] =
1432  {
1433  	0x3354, 0x00000333, 0x00000333,
1434  	0x9a10, 0x00010000, 0x00058208,
1435  	0x9830, 0xffffffff, 0x00000000,
1436  	0x9834, 0xf00fffff, 0x00000400,
1437  	0x9838, 0x0002021c, 0x00020200,
1438  	0xc78, 0x00000080, 0x00000000,
1439  	0x5bb0, 0x000000f0, 0x00000070,
1440  	0x5bc0, 0xf0311fff, 0x80300000,
1441  	0x350c, 0x00810000, 0x408af000,
1442  	0x7030, 0x31000111, 0x00000011,
1443  	0x2f48, 0x73773777, 0x12010001,
1444  	0x2120, 0x0000007f, 0x0000001b,
1445  	0x21dc, 0x00007fb6, 0x00002191,
1446  	0x3628, 0x0000003f, 0x0000000a,
1447  	0x362c, 0x0000003f, 0x0000000a,
1448  	0x2ae4, 0x00073ffe, 0x000022a2,
1449  	0x240c, 0x000007ff, 0x00000000,
1450  	0x8bf0, 0x00002001, 0x00000001,
1451  	0x8b24, 0xffffffff, 0x00ffffff,
1452  	0x30a04, 0x0000ff0f, 0x00000000,
1453  	0x28a4c, 0x07ffffff, 0x06000000,
1454  	0x3e78, 0x00000001, 0x00000002,
1455  	0xc768, 0x00000008, 0x00000008,
1456  	0xc770, 0x00000f00, 0x00000800,
1457  	0xc774, 0x00000f00, 0x00000800,
1458  	0xc798, 0x00ffffff, 0x00ff7fbf,
1459  	0xc79c, 0x00ffffff, 0x00ff7faf,
1460  	0x8c00, 0x000000ff, 0x00000800,
1461  	0xe40, 0x00001fff, 0x00001fff,
1462  	0x9060, 0x0000007f, 0x00000020,
1463  	0x9508, 0x00010000, 0x00010000,
1464  	0xae00, 0x00100000, 0x000ff07c,
1465  	0xac14, 0x000003ff, 0x0000000f,
1466  	0xac10, 0xffffffff, 0x7564fdec,
1467  	0xac0c, 0xffffffff, 0x3120b9a8,
1468  	0xac08, 0x20000000, 0x0f9c0000
1469  };
1470  
1471  static const u32 hawaii_mgcg_cgcg_init[] =
1472  {
1473  	0xc420, 0xffffffff, 0xfffffffd,
1474  	0x30800, 0xffffffff, 0xe0000000,
1475  	0x3c2a0, 0xffffffff, 0x00000100,
1476  	0x3c208, 0xffffffff, 0x00000100,
1477  	0x3c2c0, 0xffffffff, 0x00000100,
1478  	0x3c2c8, 0xffffffff, 0x00000100,
1479  	0x3c2c4, 0xffffffff, 0x00000100,
1480  	0x55e4, 0xffffffff, 0x00200100,
1481  	0x3c280, 0xffffffff, 0x00000100,
1482  	0x3c214, 0xffffffff, 0x06000100,
1483  	0x3c220, 0xffffffff, 0x00000100,
1484  	0x3c218, 0xffffffff, 0x06000100,
1485  	0x3c204, 0xffffffff, 0x00000100,
1486  	0x3c2e0, 0xffffffff, 0x00000100,
1487  	0x3c224, 0xffffffff, 0x00000100,
1488  	0x3c200, 0xffffffff, 0x00000100,
1489  	0x3c230, 0xffffffff, 0x00000100,
1490  	0x3c234, 0xffffffff, 0x00000100,
1491  	0x3c250, 0xffffffff, 0x00000100,
1492  	0x3c254, 0xffffffff, 0x00000100,
1493  	0x3c258, 0xffffffff, 0x00000100,
1494  	0x3c25c, 0xffffffff, 0x00000100,
1495  	0x3c260, 0xffffffff, 0x00000100,
1496  	0x3c27c, 0xffffffff, 0x00000100,
1497  	0x3c278, 0xffffffff, 0x00000100,
1498  	0x3c210, 0xffffffff, 0x06000100,
1499  	0x3c290, 0xffffffff, 0x00000100,
1500  	0x3c274, 0xffffffff, 0x00000100,
1501  	0x3c2b4, 0xffffffff, 0x00000100,
1502  	0x3c2b0, 0xffffffff, 0x00000100,
1503  	0x3c270, 0xffffffff, 0x00000100,
1504  	0x30800, 0xffffffff, 0xe0000000,
1505  	0x3c020, 0xffffffff, 0x00010000,
1506  	0x3c024, 0xffffffff, 0x00030002,
1507  	0x3c028, 0xffffffff, 0x00040007,
1508  	0x3c02c, 0xffffffff, 0x00060005,
1509  	0x3c030, 0xffffffff, 0x00090008,
1510  	0x3c034, 0xffffffff, 0x00010000,
1511  	0x3c038, 0xffffffff, 0x00030002,
1512  	0x3c03c, 0xffffffff, 0x00040007,
1513  	0x3c040, 0xffffffff, 0x00060005,
1514  	0x3c044, 0xffffffff, 0x00090008,
1515  	0x3c048, 0xffffffff, 0x00010000,
1516  	0x3c04c, 0xffffffff, 0x00030002,
1517  	0x3c050, 0xffffffff, 0x00040007,
1518  	0x3c054, 0xffffffff, 0x00060005,
1519  	0x3c058, 0xffffffff, 0x00090008,
1520  	0x3c05c, 0xffffffff, 0x00010000,
1521  	0x3c060, 0xffffffff, 0x00030002,
1522  	0x3c064, 0xffffffff, 0x00040007,
1523  	0x3c068, 0xffffffff, 0x00060005,
1524  	0x3c06c, 0xffffffff, 0x00090008,
1525  	0x3c070, 0xffffffff, 0x00010000,
1526  	0x3c074, 0xffffffff, 0x00030002,
1527  	0x3c078, 0xffffffff, 0x00040007,
1528  	0x3c07c, 0xffffffff, 0x00060005,
1529  	0x3c080, 0xffffffff, 0x00090008,
1530  	0x3c084, 0xffffffff, 0x00010000,
1531  	0x3c088, 0xffffffff, 0x00030002,
1532  	0x3c08c, 0xffffffff, 0x00040007,
1533  	0x3c090, 0xffffffff, 0x00060005,
1534  	0x3c094, 0xffffffff, 0x00090008,
1535  	0x3c098, 0xffffffff, 0x00010000,
1536  	0x3c09c, 0xffffffff, 0x00030002,
1537  	0x3c0a0, 0xffffffff, 0x00040007,
1538  	0x3c0a4, 0xffffffff, 0x00060005,
1539  	0x3c0a8, 0xffffffff, 0x00090008,
1540  	0x3c0ac, 0xffffffff, 0x00010000,
1541  	0x3c0b0, 0xffffffff, 0x00030002,
1542  	0x3c0b4, 0xffffffff, 0x00040007,
1543  	0x3c0b8, 0xffffffff, 0x00060005,
1544  	0x3c0bc, 0xffffffff, 0x00090008,
1545  	0x3c0c0, 0xffffffff, 0x00010000,
1546  	0x3c0c4, 0xffffffff, 0x00030002,
1547  	0x3c0c8, 0xffffffff, 0x00040007,
1548  	0x3c0cc, 0xffffffff, 0x00060005,
1549  	0x3c0d0, 0xffffffff, 0x00090008,
1550  	0x3c0d4, 0xffffffff, 0x00010000,
1551  	0x3c0d8, 0xffffffff, 0x00030002,
1552  	0x3c0dc, 0xffffffff, 0x00040007,
1553  	0x3c0e0, 0xffffffff, 0x00060005,
1554  	0x3c0e4, 0xffffffff, 0x00090008,
1555  	0x3c0e8, 0xffffffff, 0x00010000,
1556  	0x3c0ec, 0xffffffff, 0x00030002,
1557  	0x3c0f0, 0xffffffff, 0x00040007,
1558  	0x3c0f4, 0xffffffff, 0x00060005,
1559  	0x3c0f8, 0xffffffff, 0x00090008,
1560  	0xc318, 0xffffffff, 0x00020200,
1561  	0x3350, 0xffffffff, 0x00000200,
1562  	0x15c0, 0xffffffff, 0x00000400,
1563  	0x55e8, 0xffffffff, 0x00000000,
1564  	0x2f50, 0xffffffff, 0x00000902,
1565  	0x3c000, 0xffffffff, 0x96940200,
1566  	0x8708, 0xffffffff, 0x00900100,
1567  	0xc424, 0xffffffff, 0x0020003f,
1568  	0x38, 0xffffffff, 0x0140001c,
1569  	0x3c, 0x000f0000, 0x000f0000,
1570  	0x220, 0xffffffff, 0xc060000c,
1571  	0x224, 0xc0000fff, 0x00000100,
1572  	0xf90, 0xffffffff, 0x00000100,
1573  	0xf98, 0x00000101, 0x00000000,
1574  	0x20a8, 0xffffffff, 0x00000104,
1575  	0x55e4, 0xff000fff, 0x00000100,
1576  	0x30cc, 0xc0000fff, 0x00000104,
1577  	0xc1e4, 0x00000001, 0x00000001,
1578  	0xd00c, 0xff000ff0, 0x00000100,
1579  	0xd80c, 0xff000ff0, 0x00000100
1580  };
1581  
1582  static const u32 godavari_golden_registers[] =
1583  {
1584  	0x55e4, 0xff607fff, 0xfc000100,
1585  	0x6ed8, 0x00010101, 0x00010000,
1586  	0x9830, 0xffffffff, 0x00000000,
1587  	0x98302, 0xf00fffff, 0x00000400,
1588  	0x6130, 0xffffffff, 0x00010000,
1589  	0x5bb0, 0x000000f0, 0x00000070,
1590  	0x5bc0, 0xf0311fff, 0x80300000,
1591  	0x98f8, 0x73773777, 0x12010001,
1592  	0x98fc, 0xffffffff, 0x00000010,
1593  	0x8030, 0x00001f0f, 0x0000100a,
1594  	0x2f48, 0x73773777, 0x12010001,
1595  	0x2408, 0x000fffff, 0x000c007f,
1596  	0x8a14, 0xf000003f, 0x00000007,
1597  	0x8b24, 0xffffffff, 0x00ff0fff,
1598  	0x30a04, 0x0000ff0f, 0x00000000,
1599  	0x28a4c, 0x07ffffff, 0x06000000,
1600  	0x4d8, 0x00000fff, 0x00000100,
1601  	0xd014, 0x00010000, 0x00810001,
1602  	0xd814, 0x00010000, 0x00810001,
1603  	0x3e78, 0x00000001, 0x00000002,
1604  	0xc768, 0x00000008, 0x00000008,
1605  	0xc770, 0x00000f00, 0x00000800,
1606  	0xc774, 0x00000f00, 0x00000800,
1607  	0xc798, 0x00ffffff, 0x00ff7fbf,
1608  	0xc79c, 0x00ffffff, 0x00ff7faf,
1609  	0x8c00, 0x000000ff, 0x00000001,
1610  	0x214f8, 0x01ff01ff, 0x00000002,
1611  	0x21498, 0x007ff800, 0x00200000,
1612  	0x2015c, 0xffffffff, 0x00000f40,
1613  	0x88c4, 0x001f3ae3, 0x00000082,
1614  	0x88d4, 0x0000001f, 0x00000010,
1615  	0x30934, 0xffffffff, 0x00000000
1616  };
1617  
1618  
cik_init_golden_registers(struct radeon_device * rdev)1619  static void cik_init_golden_registers(struct radeon_device *rdev)
1620  {
1621  	switch (rdev->family) {
1622  	case CHIP_BONAIRE:
1623  		radeon_program_register_sequence(rdev,
1624  						 bonaire_mgcg_cgcg_init,
1625  						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626  		radeon_program_register_sequence(rdev,
1627  						 bonaire_golden_registers,
1628  						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629  		radeon_program_register_sequence(rdev,
1630  						 bonaire_golden_common_registers,
1631  						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632  		radeon_program_register_sequence(rdev,
1633  						 bonaire_golden_spm_registers,
1634  						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635  		break;
1636  	case CHIP_KABINI:
1637  		radeon_program_register_sequence(rdev,
1638  						 kalindi_mgcg_cgcg_init,
1639  						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640  		radeon_program_register_sequence(rdev,
1641  						 kalindi_golden_registers,
1642  						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643  		radeon_program_register_sequence(rdev,
1644  						 kalindi_golden_common_registers,
1645  						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646  		radeon_program_register_sequence(rdev,
1647  						 kalindi_golden_spm_registers,
1648  						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649  		break;
1650  	case CHIP_MULLINS:
1651  		radeon_program_register_sequence(rdev,
1652  						 kalindi_mgcg_cgcg_init,
1653  						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654  		radeon_program_register_sequence(rdev,
1655  						 godavari_golden_registers,
1656  						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657  		radeon_program_register_sequence(rdev,
1658  						 kalindi_golden_common_registers,
1659  						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660  		radeon_program_register_sequence(rdev,
1661  						 kalindi_golden_spm_registers,
1662  						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663  		break;
1664  	case CHIP_KAVERI:
1665  		radeon_program_register_sequence(rdev,
1666  						 spectre_mgcg_cgcg_init,
1667  						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668  		radeon_program_register_sequence(rdev,
1669  						 spectre_golden_registers,
1670  						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671  		radeon_program_register_sequence(rdev,
1672  						 spectre_golden_common_registers,
1673  						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674  		radeon_program_register_sequence(rdev,
1675  						 spectre_golden_spm_registers,
1676  						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677  		break;
1678  	case CHIP_HAWAII:
1679  		radeon_program_register_sequence(rdev,
1680  						 hawaii_mgcg_cgcg_init,
1681  						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682  		radeon_program_register_sequence(rdev,
1683  						 hawaii_golden_registers,
1684  						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685  		radeon_program_register_sequence(rdev,
1686  						 hawaii_golden_common_registers,
1687  						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688  		radeon_program_register_sequence(rdev,
1689  						 hawaii_golden_spm_registers,
1690  						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691  		break;
1692  	default:
1693  		break;
1694  	}
1695  }
1696  
1697  /**
1698   * cik_get_xclk - get the xclk
1699   *
1700   * @rdev: radeon_device pointer
1701   *
1702   * Returns the reference clock used by the gfx engine
1703   * (CIK).
1704   */
cik_get_xclk(struct radeon_device * rdev)1705  u32 cik_get_xclk(struct radeon_device *rdev)
1706  {
1707  	u32 reference_clock = rdev->clock.spll.reference_freq;
1708  
1709  	if (rdev->flags & RADEON_IS_IGP) {
1710  		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711  			return reference_clock / 2;
1712  	} else {
1713  		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714  			return reference_clock / 4;
1715  	}
1716  	return reference_clock;
1717  }
1718  
1719  /**
1720   * cik_mm_rdoorbell - read a doorbell dword
1721   *
1722   * @rdev: radeon_device pointer
1723   * @index: doorbell index
1724   *
1725   * Returns the value in the doorbell aperture at the
1726   * requested doorbell index (CIK).
1727   */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1728  u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729  {
1730  	if (index < rdev->doorbell.num_doorbells) {
1731  		return readl(rdev->doorbell.ptr + index);
1732  	} else {
1733  		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734  		return 0;
1735  	}
1736  }
1737  
1738  /**
1739   * cik_mm_wdoorbell - write a doorbell dword
1740   *
1741   * @rdev: radeon_device pointer
1742   * @index: doorbell index
1743   * @v: value to write
1744   *
1745   * Writes @v to the doorbell aperture at the
1746   * requested doorbell index (CIK).
1747   */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1748  void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749  {
1750  	if (index < rdev->doorbell.num_doorbells) {
1751  		writel(v, rdev->doorbell.ptr + index);
1752  	} else {
1753  		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754  	}
1755  }
1756  
1757  #define BONAIRE_IO_MC_REGS_SIZE 36
1758  
1759  static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760  {
1761  	{0x00000070, 0x04400000},
1762  	{0x00000071, 0x80c01803},
1763  	{0x00000072, 0x00004004},
1764  	{0x00000073, 0x00000100},
1765  	{0x00000074, 0x00ff0000},
1766  	{0x00000075, 0x34000000},
1767  	{0x00000076, 0x08000014},
1768  	{0x00000077, 0x00cc08ec},
1769  	{0x00000078, 0x00000400},
1770  	{0x00000079, 0x00000000},
1771  	{0x0000007a, 0x04090000},
1772  	{0x0000007c, 0x00000000},
1773  	{0x0000007e, 0x4408a8e8},
1774  	{0x0000007f, 0x00000304},
1775  	{0x00000080, 0x00000000},
1776  	{0x00000082, 0x00000001},
1777  	{0x00000083, 0x00000002},
1778  	{0x00000084, 0xf3e4f400},
1779  	{0x00000085, 0x052024e3},
1780  	{0x00000087, 0x00000000},
1781  	{0x00000088, 0x01000000},
1782  	{0x0000008a, 0x1c0a0000},
1783  	{0x0000008b, 0xff010000},
1784  	{0x0000008d, 0xffffefff},
1785  	{0x0000008e, 0xfff3efff},
1786  	{0x0000008f, 0xfff3efbf},
1787  	{0x00000092, 0xf7ffffff},
1788  	{0x00000093, 0xffffff7f},
1789  	{0x00000095, 0x00101101},
1790  	{0x00000096, 0x00000fff},
1791  	{0x00000097, 0x00116fff},
1792  	{0x00000098, 0x60010000},
1793  	{0x00000099, 0x10010000},
1794  	{0x0000009a, 0x00006000},
1795  	{0x0000009b, 0x00001000},
1796  	{0x0000009f, 0x00b48000}
1797  };
1798  
1799  #define HAWAII_IO_MC_REGS_SIZE 22
1800  
1801  static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802  {
1803  	{0x0000007d, 0x40000000},
1804  	{0x0000007e, 0x40180304},
1805  	{0x0000007f, 0x0000ff00},
1806  	{0x00000081, 0x00000000},
1807  	{0x00000083, 0x00000800},
1808  	{0x00000086, 0x00000000},
1809  	{0x00000087, 0x00000100},
1810  	{0x00000088, 0x00020100},
1811  	{0x00000089, 0x00000000},
1812  	{0x0000008b, 0x00040000},
1813  	{0x0000008c, 0x00000100},
1814  	{0x0000008e, 0xff010000},
1815  	{0x00000090, 0xffffefff},
1816  	{0x00000091, 0xfff3efff},
1817  	{0x00000092, 0xfff3efbf},
1818  	{0x00000093, 0xf7ffffff},
1819  	{0x00000094, 0xffffff7f},
1820  	{0x00000095, 0x00000fff},
1821  	{0x00000096, 0x00116fff},
1822  	{0x00000097, 0x60010000},
1823  	{0x00000098, 0x10010000},
1824  	{0x0000009f, 0x00c79000}
1825  };
1826  
1827  
1828  /**
1829   * cik_srbm_select - select specific register instances
1830   *
1831   * @rdev: radeon_device pointer
1832   * @me: selected ME (micro engine)
1833   * @pipe: pipe
1834   * @queue: queue
1835   * @vmid: VMID
1836   *
1837   * Switches the currently active registers instances.  Some
1838   * registers are instanced per VMID, others are instanced per
1839   * me/pipe/queue combination.
1840   */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1841  static void cik_srbm_select(struct radeon_device *rdev,
1842  			    u32 me, u32 pipe, u32 queue, u32 vmid)
1843  {
1844  	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845  			     MEID(me & 0x3) |
1846  			     VMID(vmid & 0xf) |
1847  			     QUEUEID(queue & 0x7));
1848  	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849  }
1850  
1851  /* ucode loading */
1852  /**
1853   * ci_mc_load_microcode - load MC ucode into the hw
1854   *
1855   * @rdev: radeon_device pointer
1856   *
1857   * Load the GDDR MC ucode into the hw (CIK).
1858   * Returns 0 on success, error on failure.
1859   */
ci_mc_load_microcode(struct radeon_device * rdev)1860  int ci_mc_load_microcode(struct radeon_device *rdev)
1861  {
1862  	const __be32 *fw_data = NULL;
1863  	const __le32 *new_fw_data = NULL;
1864  	u32 running, tmp;
1865  	u32 *io_mc_regs = NULL;
1866  	const __le32 *new_io_mc_regs = NULL;
1867  	int i, regs_size, ucode_size;
1868  
1869  	if (!rdev->mc_fw)
1870  		return -EINVAL;
1871  
1872  	if (rdev->new_fw) {
1873  		const struct mc_firmware_header_v1_0 *hdr =
1874  			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875  
1876  		radeon_ucode_print_mc_hdr(&hdr->header);
1877  
1878  		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879  		new_io_mc_regs = (const __le32 *)
1880  			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881  		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882  		new_fw_data = (const __le32 *)
1883  			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884  	} else {
1885  		ucode_size = rdev->mc_fw->size / 4;
1886  
1887  		switch (rdev->family) {
1888  		case CHIP_BONAIRE:
1889  			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890  			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891  			break;
1892  		case CHIP_HAWAII:
1893  			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894  			regs_size = HAWAII_IO_MC_REGS_SIZE;
1895  			break;
1896  		default:
1897  			return -EINVAL;
1898  		}
1899  		fw_data = (const __be32 *)rdev->mc_fw->data;
1900  	}
1901  
1902  	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903  
1904  	if (running == 0) {
1905  		/* reset the engine and set to writable */
1906  		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907  		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908  
1909  		/* load mc io regs */
1910  		for (i = 0; i < regs_size; i++) {
1911  			if (rdev->new_fw) {
1912  				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913  				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914  			} else {
1915  				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916  				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917  			}
1918  		}
1919  
1920  		tmp = RREG32(MC_SEQ_MISC0);
1921  		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922  			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923  			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924  			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925  			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926  		}
1927  
1928  		/* load the MC ucode */
1929  		for (i = 0; i < ucode_size; i++) {
1930  			if (rdev->new_fw)
1931  				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932  			else
1933  				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934  		}
1935  
1936  		/* put the engine back into the active state */
1937  		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938  		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939  		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940  
1941  		/* wait for training to complete */
1942  		for (i = 0; i < rdev->usec_timeout; i++) {
1943  			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944  				break;
1945  			udelay(1);
1946  		}
1947  		for (i = 0; i < rdev->usec_timeout; i++) {
1948  			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949  				break;
1950  			udelay(1);
1951  		}
1952  	}
1953  
1954  	return 0;
1955  }
1956  
1957  /**
1958   * cik_init_microcode - load ucode images from disk
1959   *
1960   * @rdev: radeon_device pointer
1961   *
1962   * Use the firmware interface to load the ucode images into
1963   * the driver (not loaded into hw).
1964   * Returns 0 on success, error on failure.
1965   */
cik_init_microcode(struct radeon_device * rdev)1966  static int cik_init_microcode(struct radeon_device *rdev)
1967  {
1968  	const char *chip_name;
1969  	const char *new_chip_name;
1970  	size_t pfp_req_size, me_req_size, ce_req_size,
1971  		mec_req_size, rlc_req_size, mc_req_size = 0,
1972  		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973  	char fw_name[30];
1974  	int new_fw = 0;
1975  	int err;
1976  	int num_fw;
1977  	bool new_smc = false;
1978  
1979  	DRM_DEBUG("\n");
1980  
1981  	switch (rdev->family) {
1982  	case CHIP_BONAIRE:
1983  		chip_name = "BONAIRE";
1984  		if ((rdev->pdev->revision == 0x80) ||
1985  		    (rdev->pdev->revision == 0x81) ||
1986  		    (rdev->pdev->device == 0x665f))
1987  			new_smc = true;
1988  		new_chip_name = "bonaire";
1989  		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990  		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991  		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992  		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993  		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994  		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995  		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996  		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997  		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998  		num_fw = 8;
1999  		break;
2000  	case CHIP_HAWAII:
2001  		chip_name = "HAWAII";
2002  		if (rdev->pdev->revision == 0x80)
2003  			new_smc = true;
2004  		new_chip_name = "hawaii";
2005  		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006  		me_req_size = CIK_ME_UCODE_SIZE * 4;
2007  		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008  		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009  		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010  		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011  		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012  		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013  		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014  		num_fw = 8;
2015  		break;
2016  	case CHIP_KAVERI:
2017  		chip_name = "KAVERI";
2018  		new_chip_name = "kaveri";
2019  		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020  		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021  		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022  		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023  		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024  		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025  		num_fw = 7;
2026  		break;
2027  	case CHIP_KABINI:
2028  		chip_name = "KABINI";
2029  		new_chip_name = "kabini";
2030  		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031  		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032  		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033  		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034  		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035  		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036  		num_fw = 6;
2037  		break;
2038  	case CHIP_MULLINS:
2039  		chip_name = "MULLINS";
2040  		new_chip_name = "mullins";
2041  		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042  		me_req_size = CIK_ME_UCODE_SIZE * 4;
2043  		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044  		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045  		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046  		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047  		num_fw = 6;
2048  		break;
2049  	default: BUG();
2050  	}
2051  
2052  	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053  
2054  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055  	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056  	if (err) {
2057  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058  		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059  		if (err)
2060  			goto out;
2061  		if (rdev->pfp_fw->size != pfp_req_size) {
2062  			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063  			       rdev->pfp_fw->size, fw_name);
2064  			err = -EINVAL;
2065  			goto out;
2066  		}
2067  	} else {
2068  		err = radeon_ucode_validate(rdev->pfp_fw);
2069  		if (err) {
2070  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071  			       fw_name);
2072  			goto out;
2073  		} else {
2074  			new_fw++;
2075  		}
2076  	}
2077  
2078  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079  	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080  	if (err) {
2081  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082  		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083  		if (err)
2084  			goto out;
2085  		if (rdev->me_fw->size != me_req_size) {
2086  			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087  			       rdev->me_fw->size, fw_name);
2088  			err = -EINVAL;
2089  		}
2090  	} else {
2091  		err = radeon_ucode_validate(rdev->me_fw);
2092  		if (err) {
2093  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094  			       fw_name);
2095  			goto out;
2096  		} else {
2097  			new_fw++;
2098  		}
2099  	}
2100  
2101  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102  	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103  	if (err) {
2104  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105  		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106  		if (err)
2107  			goto out;
2108  		if (rdev->ce_fw->size != ce_req_size) {
2109  			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110  			       rdev->ce_fw->size, fw_name);
2111  			err = -EINVAL;
2112  		}
2113  	} else {
2114  		err = radeon_ucode_validate(rdev->ce_fw);
2115  		if (err) {
2116  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117  			       fw_name);
2118  			goto out;
2119  		} else {
2120  			new_fw++;
2121  		}
2122  	}
2123  
2124  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125  	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126  	if (err) {
2127  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128  		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129  		if (err)
2130  			goto out;
2131  		if (rdev->mec_fw->size != mec_req_size) {
2132  			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133  			       rdev->mec_fw->size, fw_name);
2134  			err = -EINVAL;
2135  		}
2136  	} else {
2137  		err = radeon_ucode_validate(rdev->mec_fw);
2138  		if (err) {
2139  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140  			       fw_name);
2141  			goto out;
2142  		} else {
2143  			new_fw++;
2144  		}
2145  	}
2146  
2147  	if (rdev->family == CHIP_KAVERI) {
2148  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149  		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150  		if (err) {
2151  			goto out;
2152  		} else {
2153  			err = radeon_ucode_validate(rdev->mec2_fw);
2154  			if (err) {
2155  				goto out;
2156  			} else {
2157  				new_fw++;
2158  			}
2159  		}
2160  	}
2161  
2162  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163  	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164  	if (err) {
2165  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166  		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167  		if (err)
2168  			goto out;
2169  		if (rdev->rlc_fw->size != rlc_req_size) {
2170  			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171  			       rdev->rlc_fw->size, fw_name);
2172  			err = -EINVAL;
2173  		}
2174  	} else {
2175  		err = radeon_ucode_validate(rdev->rlc_fw);
2176  		if (err) {
2177  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178  			       fw_name);
2179  			goto out;
2180  		} else {
2181  			new_fw++;
2182  		}
2183  	}
2184  
2185  	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186  	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187  	if (err) {
2188  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189  		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190  		if (err)
2191  			goto out;
2192  		if (rdev->sdma_fw->size != sdma_req_size) {
2193  			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194  			       rdev->sdma_fw->size, fw_name);
2195  			err = -EINVAL;
2196  		}
2197  	} else {
2198  		err = radeon_ucode_validate(rdev->sdma_fw);
2199  		if (err) {
2200  			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201  			       fw_name);
2202  			goto out;
2203  		} else {
2204  			new_fw++;
2205  		}
2206  	}
2207  
2208  	/* No SMC, MC ucode on APUs */
2209  	if (!(rdev->flags & RADEON_IS_IGP)) {
2210  		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211  		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212  		if (err) {
2213  			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214  			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215  			if (err) {
2216  				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217  				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218  				if (err)
2219  					goto out;
2220  			}
2221  			if ((rdev->mc_fw->size != mc_req_size) &&
2222  			    (rdev->mc_fw->size != mc2_req_size)){
2223  				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224  				       rdev->mc_fw->size, fw_name);
2225  				err = -EINVAL;
2226  			}
2227  			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228  		} else {
2229  			err = radeon_ucode_validate(rdev->mc_fw);
2230  			if (err) {
2231  				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232  				       fw_name);
2233  				goto out;
2234  			} else {
2235  				new_fw++;
2236  			}
2237  		}
2238  
2239  		if (new_smc)
2240  			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241  		else
2242  			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243  		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244  		if (err) {
2245  			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246  			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247  			if (err) {
2248  				pr_err("smc: error loading firmware \"%s\"\n",
2249  				       fw_name);
2250  				release_firmware(rdev->smc_fw);
2251  				rdev->smc_fw = NULL;
2252  				err = 0;
2253  			} else if (rdev->smc_fw->size != smc_req_size) {
2254  				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255  				       rdev->smc_fw->size, fw_name);
2256  				err = -EINVAL;
2257  			}
2258  		} else {
2259  			err = radeon_ucode_validate(rdev->smc_fw);
2260  			if (err) {
2261  				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262  				       fw_name);
2263  				goto out;
2264  			} else {
2265  				new_fw++;
2266  			}
2267  		}
2268  	}
2269  
2270  	if (new_fw == 0) {
2271  		rdev->new_fw = false;
2272  	} else if (new_fw < num_fw) {
2273  		pr_err("ci_fw: mixing new and old firmware!\n");
2274  		err = -EINVAL;
2275  	} else {
2276  		rdev->new_fw = true;
2277  	}
2278  
2279  out:
2280  	if (err) {
2281  		if (err != -EINVAL)
2282  			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283  			       fw_name);
2284  		release_firmware(rdev->pfp_fw);
2285  		rdev->pfp_fw = NULL;
2286  		release_firmware(rdev->me_fw);
2287  		rdev->me_fw = NULL;
2288  		release_firmware(rdev->ce_fw);
2289  		rdev->ce_fw = NULL;
2290  		release_firmware(rdev->mec_fw);
2291  		rdev->mec_fw = NULL;
2292  		release_firmware(rdev->mec2_fw);
2293  		rdev->mec2_fw = NULL;
2294  		release_firmware(rdev->rlc_fw);
2295  		rdev->rlc_fw = NULL;
2296  		release_firmware(rdev->sdma_fw);
2297  		rdev->sdma_fw = NULL;
2298  		release_firmware(rdev->mc_fw);
2299  		rdev->mc_fw = NULL;
2300  		release_firmware(rdev->smc_fw);
2301  		rdev->smc_fw = NULL;
2302  	}
2303  	return err;
2304  }
2305  
2306  /*
2307   * Core functions
2308   */
2309  /**
2310   * cik_tiling_mode_table_init - init the hw tiling table
2311   *
2312   * @rdev: radeon_device pointer
2313   *
2314   * Starting with SI, the tiling setup is done globally in a
2315   * set of 32 tiling modes.  Rather than selecting each set of
2316   * parameters per surface as on older asics, we just select
2317   * which index in the tiling table we want to use, and the
2318   * surface uses those parameters (CIK).
2319   */
cik_tiling_mode_table_init(struct radeon_device * rdev)2320  static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321  {
2322  	u32 *tile = rdev->config.cik.tile_mode_array;
2323  	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324  	const u32 num_tile_mode_states =
2325  			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326  	const u32 num_secondary_tile_mode_states =
2327  			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328  	u32 reg_offset, split_equal_to_row_size;
2329  	u32 num_pipe_configs;
2330  	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331  		rdev->config.cik.max_shader_engines;
2332  
2333  	switch (rdev->config.cik.mem_row_size_in_kb) {
2334  	case 1:
2335  		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336  		break;
2337  	case 2:
2338  	default:
2339  		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340  		break;
2341  	case 4:
2342  		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343  		break;
2344  	}
2345  
2346  	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347  	if (num_pipe_configs > 8)
2348  		num_pipe_configs = 16;
2349  
2350  	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351  		tile[reg_offset] = 0;
2352  	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353  		macrotile[reg_offset] = 0;
2354  
2355  	switch(num_pipe_configs) {
2356  	case 16:
2357  		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361  		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365  		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369  		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373  		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376  			   TILE_SPLIT(split_equal_to_row_size));
2377  		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380  		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384  		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387  			   TILE_SPLIT(split_equal_to_row_size));
2388  		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390  		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391  			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393  		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397  		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401  		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405  		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408  		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412  		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416  		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420  		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423  		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427  		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431  		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433  			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435  
2436  		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439  			   NUM_BANKS(ADDR_SURF_16_BANK));
2440  		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443  			   NUM_BANKS(ADDR_SURF_16_BANK));
2444  		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447  			   NUM_BANKS(ADDR_SURF_16_BANK));
2448  		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451  			   NUM_BANKS(ADDR_SURF_16_BANK));
2452  		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455  			   NUM_BANKS(ADDR_SURF_8_BANK));
2456  		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459  			   NUM_BANKS(ADDR_SURF_4_BANK));
2460  		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463  			   NUM_BANKS(ADDR_SURF_2_BANK));
2464  		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467  			   NUM_BANKS(ADDR_SURF_16_BANK));
2468  		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469  			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470  			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471  			   NUM_BANKS(ADDR_SURF_16_BANK));
2472  		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473  			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474  			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475  			    NUM_BANKS(ADDR_SURF_16_BANK));
2476  		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477  			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478  			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479  			    NUM_BANKS(ADDR_SURF_8_BANK));
2480  		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481  			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482  			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483  			    NUM_BANKS(ADDR_SURF_4_BANK));
2484  		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485  			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486  			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487  			    NUM_BANKS(ADDR_SURF_2_BANK));
2488  		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489  			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490  			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491  			    NUM_BANKS(ADDR_SURF_2_BANK));
2492  
2493  		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494  			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495  		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496  			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497  		break;
2498  
2499  	case 8:
2500  		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504  		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508  		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512  		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516  		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519  			   TILE_SPLIT(split_equal_to_row_size));
2520  		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523  		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527  		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530  			   TILE_SPLIT(split_equal_to_row_size));
2531  		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533  		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534  			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536  		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540  		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544  		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548  		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551  		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555  		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559  		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563  		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566  		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570  		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574  		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576  			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578  
2579  		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582  				NUM_BANKS(ADDR_SURF_16_BANK));
2583  		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586  				NUM_BANKS(ADDR_SURF_16_BANK));
2587  		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590  				NUM_BANKS(ADDR_SURF_16_BANK));
2591  		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594  				NUM_BANKS(ADDR_SURF_16_BANK));
2595  		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598  				NUM_BANKS(ADDR_SURF_8_BANK));
2599  		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602  				NUM_BANKS(ADDR_SURF_4_BANK));
2603  		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606  				NUM_BANKS(ADDR_SURF_2_BANK));
2607  		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610  				NUM_BANKS(ADDR_SURF_16_BANK));
2611  		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614  				NUM_BANKS(ADDR_SURF_16_BANK));
2615  		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618  				NUM_BANKS(ADDR_SURF_16_BANK));
2619  		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622  				NUM_BANKS(ADDR_SURF_16_BANK));
2623  		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626  				NUM_BANKS(ADDR_SURF_8_BANK));
2627  		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630  				NUM_BANKS(ADDR_SURF_4_BANK));
2631  		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634  				NUM_BANKS(ADDR_SURF_2_BANK));
2635  
2636  		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637  			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638  		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639  			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640  		break;
2641  
2642  	case 4:
2643  		if (num_rbs == 4) {
2644  		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648  		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652  		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656  		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660  		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663  			   TILE_SPLIT(split_equal_to_row_size));
2664  		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667  		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671  		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674  			   TILE_SPLIT(split_equal_to_row_size));
2675  		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676  			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677  		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678  			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680  		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684  		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688  		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692  		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695  		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699  		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703  		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707  		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710  		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714  		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718  		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720  			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722  
2723  		} else if (num_rbs < 4) {
2724  		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728  		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732  		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736  		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740  		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743  			   TILE_SPLIT(split_equal_to_row_size));
2744  		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747  		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751  		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754  			   TILE_SPLIT(split_equal_to_row_size));
2755  		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756  			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757  		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758  			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760  		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764  		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768  		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772  		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775  		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779  		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783  		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787  		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790  		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794  		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798  		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800  			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802  		}
2803  
2804  		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807  				NUM_BANKS(ADDR_SURF_16_BANK));
2808  		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811  				NUM_BANKS(ADDR_SURF_16_BANK));
2812  		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815  				NUM_BANKS(ADDR_SURF_16_BANK));
2816  		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819  				NUM_BANKS(ADDR_SURF_16_BANK));
2820  		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823  				NUM_BANKS(ADDR_SURF_16_BANK));
2824  		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827  				NUM_BANKS(ADDR_SURF_8_BANK));
2828  		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831  				NUM_BANKS(ADDR_SURF_4_BANK));
2832  		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835  				NUM_BANKS(ADDR_SURF_16_BANK));
2836  		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839  				NUM_BANKS(ADDR_SURF_16_BANK));
2840  		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843  				NUM_BANKS(ADDR_SURF_16_BANK));
2844  		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847  				NUM_BANKS(ADDR_SURF_16_BANK));
2848  		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851  				NUM_BANKS(ADDR_SURF_16_BANK));
2852  		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855  				NUM_BANKS(ADDR_SURF_8_BANK));
2856  		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859  				NUM_BANKS(ADDR_SURF_4_BANK));
2860  
2861  		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862  			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863  		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864  			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865  		break;
2866  
2867  	case 2:
2868  		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870  			   PIPE_CONFIG(ADDR_SURF_P2) |
2871  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872  		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874  			   PIPE_CONFIG(ADDR_SURF_P2) |
2875  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876  		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878  			   PIPE_CONFIG(ADDR_SURF_P2) |
2879  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880  		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882  			   PIPE_CONFIG(ADDR_SURF_P2) |
2883  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884  		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886  			   PIPE_CONFIG(ADDR_SURF_P2) |
2887  			   TILE_SPLIT(split_equal_to_row_size));
2888  		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889  			   PIPE_CONFIG(ADDR_SURF_P2) |
2890  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891  		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893  			   PIPE_CONFIG(ADDR_SURF_P2) |
2894  			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895  		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897  			   PIPE_CONFIG(ADDR_SURF_P2) |
2898  			   TILE_SPLIT(split_equal_to_row_size));
2899  		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900  			   PIPE_CONFIG(ADDR_SURF_P2);
2901  		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902  			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903  			   PIPE_CONFIG(ADDR_SURF_P2));
2904  		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906  			    PIPE_CONFIG(ADDR_SURF_P2) |
2907  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908  		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910  			    PIPE_CONFIG(ADDR_SURF_P2) |
2911  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912  		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913  			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914  			    PIPE_CONFIG(ADDR_SURF_P2) |
2915  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916  		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917  			    PIPE_CONFIG(ADDR_SURF_P2) |
2918  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919  		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921  			    PIPE_CONFIG(ADDR_SURF_P2) |
2922  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923  		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925  			    PIPE_CONFIG(ADDR_SURF_P2) |
2926  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927  		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928  			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929  			    PIPE_CONFIG(ADDR_SURF_P2) |
2930  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931  		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933  			    PIPE_CONFIG(ADDR_SURF_P2));
2934  		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936  			    PIPE_CONFIG(ADDR_SURF_P2) |
2937  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938  		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940  			    PIPE_CONFIG(ADDR_SURF_P2) |
2941  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942  		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943  			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944  			    PIPE_CONFIG(ADDR_SURF_P2) |
2945  			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946  
2947  		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950  				NUM_BANKS(ADDR_SURF_16_BANK));
2951  		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954  				NUM_BANKS(ADDR_SURF_16_BANK));
2955  		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958  				NUM_BANKS(ADDR_SURF_16_BANK));
2959  		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962  				NUM_BANKS(ADDR_SURF_16_BANK));
2963  		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966  				NUM_BANKS(ADDR_SURF_16_BANK));
2967  		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970  				NUM_BANKS(ADDR_SURF_16_BANK));
2971  		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974  				NUM_BANKS(ADDR_SURF_8_BANK));
2975  		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978  				NUM_BANKS(ADDR_SURF_16_BANK));
2979  		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982  				NUM_BANKS(ADDR_SURF_16_BANK));
2983  		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986  				NUM_BANKS(ADDR_SURF_16_BANK));
2987  		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990  				NUM_BANKS(ADDR_SURF_16_BANK));
2991  		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994  				NUM_BANKS(ADDR_SURF_16_BANK));
2995  		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998  				NUM_BANKS(ADDR_SURF_16_BANK));
2999  		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000  				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001  				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002  				NUM_BANKS(ADDR_SURF_8_BANK));
3003  
3004  		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005  			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006  		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007  			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008  		break;
3009  
3010  	default:
3011  		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012  	}
3013  }
3014  
3015  /**
3016   * cik_select_se_sh - select which SE, SH to address
3017   *
3018   * @rdev: radeon_device pointer
3019   * @se_num: shader engine to address
3020   * @sh_num: sh block to address
3021   *
3022   * Select which SE, SH combinations to address. Certain
3023   * registers are instanced per SE or SH.  0xffffffff means
3024   * broadcast to all SEs or SHs (CIK).
3025   */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3026  static void cik_select_se_sh(struct radeon_device *rdev,
3027  			     u32 se_num, u32 sh_num)
3028  {
3029  	u32 data = INSTANCE_BROADCAST_WRITES;
3030  
3031  	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032  		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033  	else if (se_num == 0xffffffff)
3034  		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035  	else if (sh_num == 0xffffffff)
3036  		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037  	else
3038  		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039  	WREG32(GRBM_GFX_INDEX, data);
3040  }
3041  
3042  /**
3043   * cik_create_bitmask - create a bitmask
3044   *
3045   * @bit_width: length of the mask
3046   *
3047   * create a variable length bit mask (CIK).
3048   * Returns the bitmask.
3049   */
cik_create_bitmask(u32 bit_width)3050  static u32 cik_create_bitmask(u32 bit_width)
3051  {
3052  	u32 i, mask = 0;
3053  
3054  	for (i = 0; i < bit_width; i++) {
3055  		mask <<= 1;
3056  		mask |= 1;
3057  	}
3058  	return mask;
3059  }
3060  
3061  /**
3062   * cik_get_rb_disabled - computes the mask of disabled RBs
3063   *
3064   * @rdev: radeon_device pointer
3065   * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066   * @sh_per_se: number of SH blocks per SE for the asic
3067   *
3068   * Calculates the bitmask of disabled RBs (CIK).
3069   * Returns the disabled RB bitmask.
3070   */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3071  static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072  			      u32 max_rb_num_per_se,
3073  			      u32 sh_per_se)
3074  {
3075  	u32 data, mask;
3076  
3077  	data = RREG32(CC_RB_BACKEND_DISABLE);
3078  	if (data & 1)
3079  		data &= BACKEND_DISABLE_MASK;
3080  	else
3081  		data = 0;
3082  	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083  
3084  	data >>= BACKEND_DISABLE_SHIFT;
3085  
3086  	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087  
3088  	return data & mask;
3089  }
3090  
3091  /**
3092   * cik_setup_rb - setup the RBs on the asic
3093   *
3094   * @rdev: radeon_device pointer
3095   * @se_num: number of SEs (shader engines) for the asic
3096   * @sh_per_se: number of SH blocks per SE for the asic
3097   * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098   *
3099   * Configures per-SE/SH RB registers (CIK).
3100   */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3101  static void cik_setup_rb(struct radeon_device *rdev,
3102  			 u32 se_num, u32 sh_per_se,
3103  			 u32 max_rb_num_per_se)
3104  {
3105  	int i, j;
3106  	u32 data, mask;
3107  	u32 disabled_rbs = 0;
3108  	u32 enabled_rbs = 0;
3109  
3110  	for (i = 0; i < se_num; i++) {
3111  		for (j = 0; j < sh_per_se; j++) {
3112  			cik_select_se_sh(rdev, i, j);
3113  			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114  			if (rdev->family == CHIP_HAWAII)
3115  				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116  			else
3117  				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118  		}
3119  	}
3120  	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121  
3122  	mask = 1;
3123  	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124  		if (!(disabled_rbs & mask))
3125  			enabled_rbs |= mask;
3126  		mask <<= 1;
3127  	}
3128  
3129  	rdev->config.cik.backend_enable_mask = enabled_rbs;
3130  
3131  	for (i = 0; i < se_num; i++) {
3132  		cik_select_se_sh(rdev, i, 0xffffffff);
3133  		data = 0;
3134  		for (j = 0; j < sh_per_se; j++) {
3135  			switch (enabled_rbs & 3) {
3136  			case 0:
3137  				if (j == 0)
3138  					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139  				else
3140  					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141  				break;
3142  			case 1:
3143  				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144  				break;
3145  			case 2:
3146  				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147  				break;
3148  			case 3:
3149  			default:
3150  				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151  				break;
3152  			}
3153  			enabled_rbs >>= 2;
3154  		}
3155  		WREG32(PA_SC_RASTER_CONFIG, data);
3156  	}
3157  	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158  }
3159  
3160  /**
3161   * cik_gpu_init - setup the 3D engine
3162   *
3163   * @rdev: radeon_device pointer
3164   *
3165   * Configures the 3D engine and tiling configuration
3166   * registers so that the 3D engine is usable.
3167   */
cik_gpu_init(struct radeon_device * rdev)3168  static void cik_gpu_init(struct radeon_device *rdev)
3169  {
3170  	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171  	u32 mc_arb_ramcfg;
3172  	u32 hdp_host_path_cntl;
3173  	u32 tmp;
3174  	int i, j;
3175  
3176  	switch (rdev->family) {
3177  	case CHIP_BONAIRE:
3178  		rdev->config.cik.max_shader_engines = 2;
3179  		rdev->config.cik.max_tile_pipes = 4;
3180  		rdev->config.cik.max_cu_per_sh = 7;
3181  		rdev->config.cik.max_sh_per_se = 1;
3182  		rdev->config.cik.max_backends_per_se = 2;
3183  		rdev->config.cik.max_texture_channel_caches = 4;
3184  		rdev->config.cik.max_gprs = 256;
3185  		rdev->config.cik.max_gs_threads = 32;
3186  		rdev->config.cik.max_hw_contexts = 8;
3187  
3188  		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189  		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190  		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191  		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192  		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193  		break;
3194  	case CHIP_HAWAII:
3195  		rdev->config.cik.max_shader_engines = 4;
3196  		rdev->config.cik.max_tile_pipes = 16;
3197  		rdev->config.cik.max_cu_per_sh = 11;
3198  		rdev->config.cik.max_sh_per_se = 1;
3199  		rdev->config.cik.max_backends_per_se = 4;
3200  		rdev->config.cik.max_texture_channel_caches = 16;
3201  		rdev->config.cik.max_gprs = 256;
3202  		rdev->config.cik.max_gs_threads = 32;
3203  		rdev->config.cik.max_hw_contexts = 8;
3204  
3205  		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206  		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207  		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208  		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209  		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210  		break;
3211  	case CHIP_KAVERI:
3212  		rdev->config.cik.max_shader_engines = 1;
3213  		rdev->config.cik.max_tile_pipes = 4;
3214  		rdev->config.cik.max_cu_per_sh = 8;
3215  		rdev->config.cik.max_backends_per_se = 2;
3216  		rdev->config.cik.max_sh_per_se = 1;
3217  		rdev->config.cik.max_texture_channel_caches = 4;
3218  		rdev->config.cik.max_gprs = 256;
3219  		rdev->config.cik.max_gs_threads = 16;
3220  		rdev->config.cik.max_hw_contexts = 8;
3221  
3222  		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223  		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224  		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225  		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226  		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227  		break;
3228  	case CHIP_KABINI:
3229  	case CHIP_MULLINS:
3230  	default:
3231  		rdev->config.cik.max_shader_engines = 1;
3232  		rdev->config.cik.max_tile_pipes = 2;
3233  		rdev->config.cik.max_cu_per_sh = 2;
3234  		rdev->config.cik.max_sh_per_se = 1;
3235  		rdev->config.cik.max_backends_per_se = 1;
3236  		rdev->config.cik.max_texture_channel_caches = 2;
3237  		rdev->config.cik.max_gprs = 256;
3238  		rdev->config.cik.max_gs_threads = 16;
3239  		rdev->config.cik.max_hw_contexts = 8;
3240  
3241  		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242  		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243  		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244  		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245  		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246  		break;
3247  	}
3248  
3249  	/* Initialize HDP */
3250  	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251  		WREG32((0x2c14 + j), 0x00000000);
3252  		WREG32((0x2c18 + j), 0x00000000);
3253  		WREG32((0x2c1c + j), 0x00000000);
3254  		WREG32((0x2c20 + j), 0x00000000);
3255  		WREG32((0x2c24 + j), 0x00000000);
3256  	}
3257  
3258  	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259  	WREG32(SRBM_INT_CNTL, 0x1);
3260  	WREG32(SRBM_INT_ACK, 0x1);
3261  
3262  	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263  
3264  	RREG32(MC_SHARED_CHMAP);
3265  	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266  
3267  	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268  	rdev->config.cik.mem_max_burst_length_bytes = 256;
3269  	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270  	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271  	if (rdev->config.cik.mem_row_size_in_kb > 4)
3272  		rdev->config.cik.mem_row_size_in_kb = 4;
3273  	/* XXX use MC settings? */
3274  	rdev->config.cik.shader_engine_tile_size = 32;
3275  	rdev->config.cik.num_gpus = 1;
3276  	rdev->config.cik.multi_gpu_tile_size = 64;
3277  
3278  	/* fix up row size */
3279  	gb_addr_config &= ~ROW_SIZE_MASK;
3280  	switch (rdev->config.cik.mem_row_size_in_kb) {
3281  	case 1:
3282  	default:
3283  		gb_addr_config |= ROW_SIZE(0);
3284  		break;
3285  	case 2:
3286  		gb_addr_config |= ROW_SIZE(1);
3287  		break;
3288  	case 4:
3289  		gb_addr_config |= ROW_SIZE(2);
3290  		break;
3291  	}
3292  
3293  	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3294  	 * not have bank info, so create a custom tiling dword.
3295  	 * bits 3:0   num_pipes
3296  	 * bits 7:4   num_banks
3297  	 * bits 11:8  group_size
3298  	 * bits 15:12 row_size
3299  	 */
3300  	rdev->config.cik.tile_config = 0;
3301  	switch (rdev->config.cik.num_tile_pipes) {
3302  	case 1:
3303  		rdev->config.cik.tile_config |= (0 << 0);
3304  		break;
3305  	case 2:
3306  		rdev->config.cik.tile_config |= (1 << 0);
3307  		break;
3308  	case 4:
3309  		rdev->config.cik.tile_config |= (2 << 0);
3310  		break;
3311  	case 8:
3312  	default:
3313  		/* XXX what about 12? */
3314  		rdev->config.cik.tile_config |= (3 << 0);
3315  		break;
3316  	}
3317  	rdev->config.cik.tile_config |=
3318  		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319  	rdev->config.cik.tile_config |=
3320  		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321  	rdev->config.cik.tile_config |=
3322  		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323  
3324  	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325  	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326  	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327  	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328  	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329  	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330  	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331  	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332  
3333  	cik_tiling_mode_table_init(rdev);
3334  
3335  	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336  		     rdev->config.cik.max_sh_per_se,
3337  		     rdev->config.cik.max_backends_per_se);
3338  
3339  	rdev->config.cik.active_cus = 0;
3340  	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341  		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342  			rdev->config.cik.active_cus +=
3343  				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344  		}
3345  	}
3346  
3347  	/* set HW defaults for 3D engine */
3348  	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349  
3350  	WREG32(SX_DEBUG_1, 0x20);
3351  
3352  	WREG32(TA_CNTL_AUX, 0x00010000);
3353  
3354  	tmp = RREG32(SPI_CONFIG_CNTL);
3355  	tmp |= 0x03000000;
3356  	WREG32(SPI_CONFIG_CNTL, tmp);
3357  
3358  	WREG32(SQ_CONFIG, 1);
3359  
3360  	WREG32(DB_DEBUG, 0);
3361  
3362  	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363  	tmp |= 0x00000400;
3364  	WREG32(DB_DEBUG2, tmp);
3365  
3366  	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367  	tmp |= 0x00020200;
3368  	WREG32(DB_DEBUG3, tmp);
3369  
3370  	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371  	tmp |= 0x00018208;
3372  	WREG32(CB_HW_CONTROL, tmp);
3373  
3374  	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375  
3376  	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377  				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378  				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379  				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380  
3381  	WREG32(VGT_NUM_INSTANCES, 1);
3382  
3383  	WREG32(CP_PERFMON_CNTL, 0);
3384  
3385  	WREG32(SQ_CONFIG, 0);
3386  
3387  	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388  					  FORCE_EOV_MAX_REZ_CNT(255)));
3389  
3390  	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391  	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392  
3393  	WREG32(VGT_GS_VERTEX_REUSE, 16);
3394  	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395  
3396  	tmp = RREG32(HDP_MISC_CNTL);
3397  	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398  	WREG32(HDP_MISC_CNTL, tmp);
3399  
3400  	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401  	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402  
3403  	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404  	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405  
3406  	udelay(50);
3407  }
3408  
3409  /*
3410   * GPU scratch registers helpers function.
3411   */
3412  /**
3413   * cik_scratch_init - setup driver info for CP scratch regs
3414   *
3415   * @rdev: radeon_device pointer
3416   *
3417   * Set up the number and offset of the CP scratch registers.
3418   * NOTE: use of CP scratch registers is a legacy inferface and
3419   * is not used by default on newer asics (r6xx+).  On newer asics,
3420   * memory buffers are used for fences rather than scratch regs.
3421   */
cik_scratch_init(struct radeon_device * rdev)3422  static void cik_scratch_init(struct radeon_device *rdev)
3423  {
3424  	int i;
3425  
3426  	rdev->scratch.num_reg = 7;
3427  	rdev->scratch.reg_base = SCRATCH_REG0;
3428  	for (i = 0; i < rdev->scratch.num_reg; i++) {
3429  		rdev->scratch.free[i] = true;
3430  		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431  	}
3432  }
3433  
3434  /**
3435   * cik_ring_test - basic gfx ring test
3436   *
3437   * @rdev: radeon_device pointer
3438   * @ring: radeon_ring structure holding ring information
3439   *
3440   * Allocate a scratch register and write to it using the gfx ring (CIK).
3441   * Provides a basic gfx ring test to verify that the ring is working.
3442   * Used by cik_cp_gfx_resume();
3443   * Returns 0 on success, error on failure.
3444   */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3445  int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446  {
3447  	uint32_t scratch;
3448  	uint32_t tmp = 0;
3449  	unsigned i;
3450  	int r;
3451  
3452  	r = radeon_scratch_get(rdev, &scratch);
3453  	if (r) {
3454  		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455  		return r;
3456  	}
3457  	WREG32(scratch, 0xCAFEDEAD);
3458  	r = radeon_ring_lock(rdev, ring, 3);
3459  	if (r) {
3460  		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461  		radeon_scratch_free(rdev, scratch);
3462  		return r;
3463  	}
3464  	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465  	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466  	radeon_ring_write(ring, 0xDEADBEEF);
3467  	radeon_ring_unlock_commit(rdev, ring, false);
3468  
3469  	for (i = 0; i < rdev->usec_timeout; i++) {
3470  		tmp = RREG32(scratch);
3471  		if (tmp == 0xDEADBEEF)
3472  			break;
3473  		udelay(1);
3474  	}
3475  	if (i < rdev->usec_timeout) {
3476  		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477  	} else {
3478  		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479  			  ring->idx, scratch, tmp);
3480  		r = -EINVAL;
3481  	}
3482  	radeon_scratch_free(rdev, scratch);
3483  	return r;
3484  }
3485  
3486  /**
3487   * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488   *
3489   * @rdev: radeon_device pointer
3490   * @ridx: radeon ring index
3491   *
3492   * Emits an hdp flush on the cp.
3493   */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3494  static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495  				       int ridx)
3496  {
3497  	struct radeon_ring *ring = &rdev->ring[ridx];
3498  	u32 ref_and_mask;
3499  
3500  	switch (ring->idx) {
3501  	case CAYMAN_RING_TYPE_CP1_INDEX:
3502  	case CAYMAN_RING_TYPE_CP2_INDEX:
3503  	default:
3504  		switch (ring->me) {
3505  		case 0:
3506  			ref_and_mask = CP2 << ring->pipe;
3507  			break;
3508  		case 1:
3509  			ref_and_mask = CP6 << ring->pipe;
3510  			break;
3511  		default:
3512  			return;
3513  		}
3514  		break;
3515  	case RADEON_RING_TYPE_GFX_INDEX:
3516  		ref_and_mask = CP0;
3517  		break;
3518  	}
3519  
3520  	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521  	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522  				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523  				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524  	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525  	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526  	radeon_ring_write(ring, ref_and_mask);
3527  	radeon_ring_write(ring, ref_and_mask);
3528  	radeon_ring_write(ring, 0x20); /* poll interval */
3529  }
3530  
3531  /**
3532   * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533   *
3534   * @rdev: radeon_device pointer
3535   * @fence: radeon fence object
3536   *
3537   * Emits a fence sequnce number on the gfx ring and flushes
3538   * GPU caches.
3539   */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3540  void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541  			     struct radeon_fence *fence)
3542  {
3543  	struct radeon_ring *ring = &rdev->ring[fence->ring];
3544  	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545  
3546  	/* Workaround for cache flush problems. First send a dummy EOP
3547  	 * event down the pipe with seq one below.
3548  	 */
3549  	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550  	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551  				 EOP_TC_ACTION_EN |
3552  				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553  				 EVENT_INDEX(5)));
3554  	radeon_ring_write(ring, addr & 0xfffffffc);
3555  	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556  				DATA_SEL(1) | INT_SEL(0));
3557  	radeon_ring_write(ring, fence->seq - 1);
3558  	radeon_ring_write(ring, 0);
3559  
3560  	/* Then send the real EOP event down the pipe. */
3561  	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562  	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563  				 EOP_TC_ACTION_EN |
3564  				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565  				 EVENT_INDEX(5)));
3566  	radeon_ring_write(ring, addr & 0xfffffffc);
3567  	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568  	radeon_ring_write(ring, fence->seq);
3569  	radeon_ring_write(ring, 0);
3570  }
3571  
3572  /**
3573   * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574   *
3575   * @rdev: radeon_device pointer
3576   * @fence: radeon fence object
3577   *
3578   * Emits a fence sequnce number on the compute ring and flushes
3579   * GPU caches.
3580   */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3581  void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582  				 struct radeon_fence *fence)
3583  {
3584  	struct radeon_ring *ring = &rdev->ring[fence->ring];
3585  	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586  
3587  	/* RELEASE_MEM - flush caches, send int */
3588  	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589  	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590  				 EOP_TC_ACTION_EN |
3591  				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592  				 EVENT_INDEX(5)));
3593  	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594  	radeon_ring_write(ring, addr & 0xfffffffc);
3595  	radeon_ring_write(ring, upper_32_bits(addr));
3596  	radeon_ring_write(ring, fence->seq);
3597  	radeon_ring_write(ring, 0);
3598  }
3599  
3600  /**
3601   * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602   *
3603   * @rdev: radeon_device pointer
3604   * @ring: radeon ring buffer object
3605   * @semaphore: radeon semaphore object
3606   * @emit_wait: Is this a semaphore wait?
3607   *
3608   * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609   * from running ahead of semaphore waits.
3610   */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)3611  bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612  			     struct radeon_ring *ring,
3613  			     struct radeon_semaphore *semaphore,
3614  			     bool emit_wait)
3615  {
3616  	uint64_t addr = semaphore->gpu_addr;
3617  	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618  
3619  	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620  	radeon_ring_write(ring, lower_32_bits(addr));
3621  	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622  
3623  	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624  		/* Prevent the PFP from running ahead of the semaphore wait */
3625  		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626  		radeon_ring_write(ring, 0x0);
3627  	}
3628  
3629  	return true;
3630  }
3631  
3632  /**
3633   * cik_copy_cpdma - copy pages using the CP DMA engine
3634   *
3635   * @rdev: radeon_device pointer
3636   * @src_offset: src GPU address
3637   * @dst_offset: dst GPU address
3638   * @num_gpu_pages: number of GPU pages to xfer
3639   * @resv: reservation object to sync to
3640   *
3641   * Copy GPU paging using the CP DMA engine (CIK+).
3642   * Used by the radeon ttm implementation to move pages if
3643   * registered as the asic copy callback.
3644   */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct dma_resv * resv)3645  struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646  				    uint64_t src_offset, uint64_t dst_offset,
3647  				    unsigned num_gpu_pages,
3648  				    struct dma_resv *resv)
3649  {
3650  	struct radeon_fence *fence;
3651  	struct radeon_sync sync;
3652  	int ring_index = rdev->asic->copy.blit_ring_index;
3653  	struct radeon_ring *ring = &rdev->ring[ring_index];
3654  	u32 size_in_bytes, cur_size_in_bytes, control;
3655  	int i, num_loops;
3656  	int r = 0;
3657  
3658  	radeon_sync_create(&sync);
3659  
3660  	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661  	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662  	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663  	if (r) {
3664  		DRM_ERROR("radeon: moving bo (%d).\n", r);
3665  		radeon_sync_free(rdev, &sync, NULL);
3666  		return ERR_PTR(r);
3667  	}
3668  
3669  	radeon_sync_resv(rdev, &sync, resv, false);
3670  	radeon_sync_rings(rdev, &sync, ring->idx);
3671  
3672  	for (i = 0; i < num_loops; i++) {
3673  		cur_size_in_bytes = size_in_bytes;
3674  		if (cur_size_in_bytes > 0x1fffff)
3675  			cur_size_in_bytes = 0x1fffff;
3676  		size_in_bytes -= cur_size_in_bytes;
3677  		control = 0;
3678  		if (size_in_bytes == 0)
3679  			control |= PACKET3_DMA_DATA_CP_SYNC;
3680  		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681  		radeon_ring_write(ring, control);
3682  		radeon_ring_write(ring, lower_32_bits(src_offset));
3683  		radeon_ring_write(ring, upper_32_bits(src_offset));
3684  		radeon_ring_write(ring, lower_32_bits(dst_offset));
3685  		radeon_ring_write(ring, upper_32_bits(dst_offset));
3686  		radeon_ring_write(ring, cur_size_in_bytes);
3687  		src_offset += cur_size_in_bytes;
3688  		dst_offset += cur_size_in_bytes;
3689  	}
3690  
3691  	r = radeon_fence_emit(rdev, &fence, ring->idx);
3692  	if (r) {
3693  		radeon_ring_unlock_undo(rdev, ring);
3694  		radeon_sync_free(rdev, &sync, NULL);
3695  		return ERR_PTR(r);
3696  	}
3697  
3698  	radeon_ring_unlock_commit(rdev, ring, false);
3699  	radeon_sync_free(rdev, &sync, fence);
3700  
3701  	return fence;
3702  }
3703  
3704  /*
3705   * IB stuff
3706   */
3707  /**
3708   * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709   *
3710   * @rdev: radeon_device pointer
3711   * @ib: radeon indirect buffer object
3712   *
3713   * Emits a DE (drawing engine) or CE (constant engine) IB
3714   * on the gfx ring.  IBs are usually generated by userspace
3715   * acceleration drivers and submitted to the kernel for
3716   * scheduling on the ring.  This function schedules the IB
3717   * on the gfx ring for execution by the GPU.
3718   */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3719  void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720  {
3721  	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722  	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723  	u32 header, control = INDIRECT_BUFFER_VALID;
3724  
3725  	if (ib->is_const_ib) {
3726  		/* set switch buffer packet before const IB */
3727  		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728  		radeon_ring_write(ring, 0);
3729  
3730  		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731  	} else {
3732  		u32 next_rptr;
3733  		if (ring->rptr_save_reg) {
3734  			next_rptr = ring->wptr + 3 + 4;
3735  			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736  			radeon_ring_write(ring, ((ring->rptr_save_reg -
3737  						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738  			radeon_ring_write(ring, next_rptr);
3739  		} else if (rdev->wb.enabled) {
3740  			next_rptr = ring->wptr + 5 + 4;
3741  			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742  			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743  			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744  			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745  			radeon_ring_write(ring, next_rptr);
3746  		}
3747  
3748  		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749  	}
3750  
3751  	control |= ib->length_dw | (vm_id << 24);
3752  
3753  	radeon_ring_write(ring, header);
3754  	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755  	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756  	radeon_ring_write(ring, control);
3757  }
3758  
3759  /**
3760   * cik_ib_test - basic gfx ring IB test
3761   *
3762   * @rdev: radeon_device pointer
3763   * @ring: radeon_ring structure holding ring information
3764   *
3765   * Allocate an IB and execute it on the gfx ring (CIK).
3766   * Provides a basic gfx ring test to verify that IBs are working.
3767   * Returns 0 on success, error on failure.
3768   */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)3769  int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770  {
3771  	struct radeon_ib ib;
3772  	uint32_t scratch;
3773  	uint32_t tmp = 0;
3774  	unsigned i;
3775  	int r;
3776  
3777  	r = radeon_scratch_get(rdev, &scratch);
3778  	if (r) {
3779  		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780  		return r;
3781  	}
3782  	WREG32(scratch, 0xCAFEDEAD);
3783  	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784  	if (r) {
3785  		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786  		radeon_scratch_free(rdev, scratch);
3787  		return r;
3788  	}
3789  	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790  	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791  	ib.ptr[2] = 0xDEADBEEF;
3792  	ib.length_dw = 3;
3793  	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794  	if (r) {
3795  		radeon_scratch_free(rdev, scratch);
3796  		radeon_ib_free(rdev, &ib);
3797  		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798  		return r;
3799  	}
3800  	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801  		RADEON_USEC_IB_TEST_TIMEOUT));
3802  	if (r < 0) {
3803  		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804  		radeon_scratch_free(rdev, scratch);
3805  		radeon_ib_free(rdev, &ib);
3806  		return r;
3807  	} else if (r == 0) {
3808  		DRM_ERROR("radeon: fence wait timed out.\n");
3809  		radeon_scratch_free(rdev, scratch);
3810  		radeon_ib_free(rdev, &ib);
3811  		return -ETIMEDOUT;
3812  	}
3813  	r = 0;
3814  	for (i = 0; i < rdev->usec_timeout; i++) {
3815  		tmp = RREG32(scratch);
3816  		if (tmp == 0xDEADBEEF)
3817  			break;
3818  		udelay(1);
3819  	}
3820  	if (i < rdev->usec_timeout) {
3821  		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822  	} else {
3823  		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824  			  scratch, tmp);
3825  		r = -EINVAL;
3826  	}
3827  	radeon_scratch_free(rdev, scratch);
3828  	radeon_ib_free(rdev, &ib);
3829  	return r;
3830  }
3831  
3832  /*
3833   * CP.
3834   * On CIK, gfx and compute now have independant command processors.
3835   *
3836   * GFX
3837   * Gfx consists of a single ring and can process both gfx jobs and
3838   * compute jobs.  The gfx CP consists of three microengines (ME):
3839   * PFP - Pre-Fetch Parser
3840   * ME - Micro Engine
3841   * CE - Constant Engine
3842   * The PFP and ME make up what is considered the Drawing Engine (DE).
3843   * The CE is an asynchronous engine used for updating buffer desciptors
3844   * used by the DE so that they can be loaded into cache in parallel
3845   * while the DE is processing state update packets.
3846   *
3847   * Compute
3848   * The compute CP consists of two microengines (ME):
3849   * MEC1 - Compute MicroEngine 1
3850   * MEC2 - Compute MicroEngine 2
3851   * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852   * The queues are exposed to userspace and are programmed directly
3853   * by the compute runtime.
3854   */
3855  /**
3856   * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857   *
3858   * @rdev: radeon_device pointer
3859   * @enable: enable or disable the MEs
3860   *
3861   * Halts or unhalts the gfx MEs.
3862   */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)3863  static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864  {
3865  	if (enable)
3866  		WREG32(CP_ME_CNTL, 0);
3867  	else {
3868  		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869  			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870  		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871  		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872  	}
3873  	udelay(50);
3874  }
3875  
3876  /**
3877   * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878   *
3879   * @rdev: radeon_device pointer
3880   *
3881   * Loads the gfx PFP, ME, and CE ucode.
3882   * Returns 0 for success, -EINVAL if the ucode is not available.
3883   */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)3884  static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885  {
3886  	int i;
3887  
3888  	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889  		return -EINVAL;
3890  
3891  	cik_cp_gfx_enable(rdev, false);
3892  
3893  	if (rdev->new_fw) {
3894  		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895  			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896  		const struct gfx_firmware_header_v1_0 *ce_hdr =
3897  			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898  		const struct gfx_firmware_header_v1_0 *me_hdr =
3899  			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900  		const __le32 *fw_data;
3901  		u32 fw_size;
3902  
3903  		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904  		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905  		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906  
3907  		/* PFP */
3908  		fw_data = (const __le32 *)
3909  			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910  		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911  		WREG32(CP_PFP_UCODE_ADDR, 0);
3912  		for (i = 0; i < fw_size; i++)
3913  			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914  		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915  
3916  		/* CE */
3917  		fw_data = (const __le32 *)
3918  			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919  		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920  		WREG32(CP_CE_UCODE_ADDR, 0);
3921  		for (i = 0; i < fw_size; i++)
3922  			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923  		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924  
3925  		/* ME */
3926  		fw_data = (const __be32 *)
3927  			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928  		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929  		WREG32(CP_ME_RAM_WADDR, 0);
3930  		for (i = 0; i < fw_size; i++)
3931  			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932  		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933  		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934  	} else {
3935  		const __be32 *fw_data;
3936  
3937  		/* PFP */
3938  		fw_data = (const __be32 *)rdev->pfp_fw->data;
3939  		WREG32(CP_PFP_UCODE_ADDR, 0);
3940  		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941  			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942  		WREG32(CP_PFP_UCODE_ADDR, 0);
3943  
3944  		/* CE */
3945  		fw_data = (const __be32 *)rdev->ce_fw->data;
3946  		WREG32(CP_CE_UCODE_ADDR, 0);
3947  		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948  			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949  		WREG32(CP_CE_UCODE_ADDR, 0);
3950  
3951  		/* ME */
3952  		fw_data = (const __be32 *)rdev->me_fw->data;
3953  		WREG32(CP_ME_RAM_WADDR, 0);
3954  		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955  			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956  		WREG32(CP_ME_RAM_WADDR, 0);
3957  	}
3958  
3959  	return 0;
3960  }
3961  
3962  /**
3963   * cik_cp_gfx_start - start the gfx ring
3964   *
3965   * @rdev: radeon_device pointer
3966   *
3967   * Enables the ring and loads the clear state context and other
3968   * packets required to init the ring.
3969   * Returns 0 for success, error for failure.
3970   */
cik_cp_gfx_start(struct radeon_device * rdev)3971  static int cik_cp_gfx_start(struct radeon_device *rdev)
3972  {
3973  	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974  	int r, i;
3975  
3976  	/* init the CP */
3977  	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978  	WREG32(CP_ENDIAN_SWAP, 0);
3979  	WREG32(CP_DEVICE_ID, 1);
3980  
3981  	cik_cp_gfx_enable(rdev, true);
3982  
3983  	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984  	if (r) {
3985  		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986  		return r;
3987  	}
3988  
3989  	/* init the CE partitions.  CE only used for gfx on CIK */
3990  	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991  	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992  	radeon_ring_write(ring, 0x8000);
3993  	radeon_ring_write(ring, 0x8000);
3994  
3995  	/* setup clear context state */
3996  	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997  	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998  
3999  	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000  	radeon_ring_write(ring, 0x80000000);
4001  	radeon_ring_write(ring, 0x80000000);
4002  
4003  	for (i = 0; i < cik_default_size; i++)
4004  		radeon_ring_write(ring, cik_default_state[i]);
4005  
4006  	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007  	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008  
4009  	/* set clear context state */
4010  	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011  	radeon_ring_write(ring, 0);
4012  
4013  	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014  	radeon_ring_write(ring, 0x00000316);
4015  	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016  	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017  
4018  	radeon_ring_unlock_commit(rdev, ring, false);
4019  
4020  	return 0;
4021  }
4022  
4023  /**
4024   * cik_cp_gfx_fini - stop the gfx ring
4025   *
4026   * @rdev: radeon_device pointer
4027   *
4028   * Stop the gfx ring and tear down the driver ring
4029   * info.
4030   */
cik_cp_gfx_fini(struct radeon_device * rdev)4031  static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032  {
4033  	cik_cp_gfx_enable(rdev, false);
4034  	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035  }
4036  
4037  /**
4038   * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039   *
4040   * @rdev: radeon_device pointer
4041   *
4042   * Program the location and size of the gfx ring buffer
4043   * and test it to make sure it's working.
4044   * Returns 0 for success, error for failure.
4045   */
cik_cp_gfx_resume(struct radeon_device * rdev)4046  static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047  {
4048  	struct radeon_ring *ring;
4049  	u32 tmp;
4050  	u32 rb_bufsz;
4051  	u64 rb_addr;
4052  	int r;
4053  
4054  	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055  	if (rdev->family != CHIP_HAWAII)
4056  		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057  
4058  	/* Set the write pointer delay */
4059  	WREG32(CP_RB_WPTR_DELAY, 0);
4060  
4061  	/* set the RB to use vmid 0 */
4062  	WREG32(CP_RB_VMID, 0);
4063  
4064  	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065  
4066  	/* ring 0 - compute and gfx */
4067  	/* Set ring buffer size */
4068  	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069  	rb_bufsz = order_base_2(ring->ring_size / 8);
4070  	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071  #ifdef __BIG_ENDIAN
4072  	tmp |= BUF_SWAP_32BIT;
4073  #endif
4074  	WREG32(CP_RB0_CNTL, tmp);
4075  
4076  	/* Initialize the ring buffer's read and write pointers */
4077  	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078  	ring->wptr = 0;
4079  	WREG32(CP_RB0_WPTR, ring->wptr);
4080  
4081  	/* set the wb address wether it's enabled or not */
4082  	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083  	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084  
4085  	/* scratch register shadowing is no longer supported */
4086  	WREG32(SCRATCH_UMSK, 0);
4087  
4088  	if (!rdev->wb.enabled)
4089  		tmp |= RB_NO_UPDATE;
4090  
4091  	mdelay(1);
4092  	WREG32(CP_RB0_CNTL, tmp);
4093  
4094  	rb_addr = ring->gpu_addr >> 8;
4095  	WREG32(CP_RB0_BASE, rb_addr);
4096  	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097  
4098  	/* start the ring */
4099  	cik_cp_gfx_start(rdev);
4100  	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101  	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102  	if (r) {
4103  		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104  		return r;
4105  	}
4106  
4107  	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108  		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109  
4110  	return 0;
4111  }
4112  
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4113  u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114  		     struct radeon_ring *ring)
4115  {
4116  	u32 rptr;
4117  
4118  	if (rdev->wb.enabled)
4119  		rptr = rdev->wb.wb[ring->rptr_offs/4];
4120  	else
4121  		rptr = RREG32(CP_RB0_RPTR);
4122  
4123  	return rptr;
4124  }
4125  
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4126  u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127  		     struct radeon_ring *ring)
4128  {
4129  	return RREG32(CP_RB0_WPTR);
4130  }
4131  
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4132  void cik_gfx_set_wptr(struct radeon_device *rdev,
4133  		      struct radeon_ring *ring)
4134  {
4135  	WREG32(CP_RB0_WPTR, ring->wptr);
4136  	(void)RREG32(CP_RB0_WPTR);
4137  }
4138  
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4139  u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140  			 struct radeon_ring *ring)
4141  {
4142  	u32 rptr;
4143  
4144  	if (rdev->wb.enabled) {
4145  		rptr = rdev->wb.wb[ring->rptr_offs/4];
4146  	} else {
4147  		mutex_lock(&rdev->srbm_mutex);
4148  		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149  		rptr = RREG32(CP_HQD_PQ_RPTR);
4150  		cik_srbm_select(rdev, 0, 0, 0, 0);
4151  		mutex_unlock(&rdev->srbm_mutex);
4152  	}
4153  
4154  	return rptr;
4155  }
4156  
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4157  u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158  			 struct radeon_ring *ring)
4159  {
4160  	u32 wptr;
4161  
4162  	if (rdev->wb.enabled) {
4163  		/* XXX check if swapping is necessary on BE */
4164  		wptr = rdev->wb.wb[ring->wptr_offs/4];
4165  	} else {
4166  		mutex_lock(&rdev->srbm_mutex);
4167  		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168  		wptr = RREG32(CP_HQD_PQ_WPTR);
4169  		cik_srbm_select(rdev, 0, 0, 0, 0);
4170  		mutex_unlock(&rdev->srbm_mutex);
4171  	}
4172  
4173  	return wptr;
4174  }
4175  
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4176  void cik_compute_set_wptr(struct radeon_device *rdev,
4177  			  struct radeon_ring *ring)
4178  {
4179  	/* XXX check if swapping is necessary on BE */
4180  	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181  	WDOORBELL32(ring->doorbell_index, ring->wptr);
4182  }
4183  
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4184  static void cik_compute_stop(struct radeon_device *rdev,
4185  			     struct radeon_ring *ring)
4186  {
4187  	u32 j, tmp;
4188  
4189  	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190  	/* Disable wptr polling. */
4191  	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192  	tmp &= ~WPTR_POLL_EN;
4193  	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194  	/* Disable HQD. */
4195  	if (RREG32(CP_HQD_ACTIVE) & 1) {
4196  		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197  		for (j = 0; j < rdev->usec_timeout; j++) {
4198  			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199  				break;
4200  			udelay(1);
4201  		}
4202  		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203  		WREG32(CP_HQD_PQ_RPTR, 0);
4204  		WREG32(CP_HQD_PQ_WPTR, 0);
4205  	}
4206  	cik_srbm_select(rdev, 0, 0, 0, 0);
4207  }
4208  
4209  /**
4210   * cik_cp_compute_enable - enable/disable the compute CP MEs
4211   *
4212   * @rdev: radeon_device pointer
4213   * @enable: enable or disable the MEs
4214   *
4215   * Halts or unhalts the compute MEs.
4216   */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4217  static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218  {
4219  	if (enable)
4220  		WREG32(CP_MEC_CNTL, 0);
4221  	else {
4222  		/*
4223  		 * To make hibernation reliable we need to clear compute ring
4224  		 * configuration before halting the compute ring.
4225  		 */
4226  		mutex_lock(&rdev->srbm_mutex);
4227  		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228  		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229  		mutex_unlock(&rdev->srbm_mutex);
4230  
4231  		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232  		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233  		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234  	}
4235  	udelay(50);
4236  }
4237  
4238  /**
4239   * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240   *
4241   * @rdev: radeon_device pointer
4242   *
4243   * Loads the compute MEC1&2 ucode.
4244   * Returns 0 for success, -EINVAL if the ucode is not available.
4245   */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4246  static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247  {
4248  	int i;
4249  
4250  	if (!rdev->mec_fw)
4251  		return -EINVAL;
4252  
4253  	cik_cp_compute_enable(rdev, false);
4254  
4255  	if (rdev->new_fw) {
4256  		const struct gfx_firmware_header_v1_0 *mec_hdr =
4257  			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258  		const __le32 *fw_data;
4259  		u32 fw_size;
4260  
4261  		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262  
4263  		/* MEC1 */
4264  		fw_data = (const __le32 *)
4265  			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266  		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267  		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268  		for (i = 0; i < fw_size; i++)
4269  			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270  		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271  
4272  		/* MEC2 */
4273  		if (rdev->family == CHIP_KAVERI) {
4274  			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275  				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276  
4277  			fw_data = (const __le32 *)
4278  				(rdev->mec2_fw->data +
4279  				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280  			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281  			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282  			for (i = 0; i < fw_size; i++)
4283  				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284  			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285  		}
4286  	} else {
4287  		const __be32 *fw_data;
4288  
4289  		/* MEC1 */
4290  		fw_data = (const __be32 *)rdev->mec_fw->data;
4291  		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292  		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293  			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294  		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295  
4296  		if (rdev->family == CHIP_KAVERI) {
4297  			/* MEC2 */
4298  			fw_data = (const __be32 *)rdev->mec_fw->data;
4299  			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300  			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301  				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302  			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303  		}
4304  	}
4305  
4306  	return 0;
4307  }
4308  
4309  /**
4310   * cik_cp_compute_start - start the compute queues
4311   *
4312   * @rdev: radeon_device pointer
4313   *
4314   * Enable the compute queues.
4315   * Returns 0 for success, error for failure.
4316   */
cik_cp_compute_start(struct radeon_device * rdev)4317  static int cik_cp_compute_start(struct radeon_device *rdev)
4318  {
4319  	cik_cp_compute_enable(rdev, true);
4320  
4321  	return 0;
4322  }
4323  
4324  /**
4325   * cik_cp_compute_fini - stop the compute queues
4326   *
4327   * @rdev: radeon_device pointer
4328   *
4329   * Stop the compute queues and tear down the driver queue
4330   * info.
4331   */
cik_cp_compute_fini(struct radeon_device * rdev)4332  static void cik_cp_compute_fini(struct radeon_device *rdev)
4333  {
4334  	int i, idx, r;
4335  
4336  	cik_cp_compute_enable(rdev, false);
4337  
4338  	for (i = 0; i < 2; i++) {
4339  		if (i == 0)
4340  			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341  		else
4342  			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343  
4344  		if (rdev->ring[idx].mqd_obj) {
4345  			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346  			if (unlikely(r != 0))
4347  				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348  
4349  			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350  			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351  
4352  			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353  			rdev->ring[idx].mqd_obj = NULL;
4354  		}
4355  	}
4356  }
4357  
cik_mec_fini(struct radeon_device * rdev)4358  static void cik_mec_fini(struct radeon_device *rdev)
4359  {
4360  	int r;
4361  
4362  	if (rdev->mec.hpd_eop_obj) {
4363  		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364  		if (unlikely(r != 0))
4365  			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366  		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367  		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368  
4369  		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370  		rdev->mec.hpd_eop_obj = NULL;
4371  	}
4372  }
4373  
4374  #define MEC_HPD_SIZE 2048
4375  
cik_mec_init(struct radeon_device * rdev)4376  static int cik_mec_init(struct radeon_device *rdev)
4377  {
4378  	int r;
4379  	u32 *hpd;
4380  
4381  	/*
4382  	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383  	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384  	 */
4385  	if (rdev->family == CHIP_KAVERI)
4386  		rdev->mec.num_mec = 2;
4387  	else
4388  		rdev->mec.num_mec = 1;
4389  	rdev->mec.num_pipe = 4;
4390  	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391  
4392  	if (rdev->mec.hpd_eop_obj == NULL) {
4393  		r = radeon_bo_create(rdev,
4394  				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395  				     PAGE_SIZE, true,
4396  				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397  				     &rdev->mec.hpd_eop_obj);
4398  		if (r) {
4399  			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400  			return r;
4401  		}
4402  	}
4403  
4404  	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405  	if (unlikely(r != 0)) {
4406  		cik_mec_fini(rdev);
4407  		return r;
4408  	}
4409  	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410  			  &rdev->mec.hpd_eop_gpu_addr);
4411  	if (r) {
4412  		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413  		cik_mec_fini(rdev);
4414  		return r;
4415  	}
4416  	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417  	if (r) {
4418  		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419  		cik_mec_fini(rdev);
4420  		return r;
4421  	}
4422  
4423  	/* clear memory.  Not sure if this is required or not */
4424  	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425  
4426  	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427  	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428  
4429  	return 0;
4430  }
4431  
4432  struct hqd_registers
4433  {
4434  	u32 cp_mqd_base_addr;
4435  	u32 cp_mqd_base_addr_hi;
4436  	u32 cp_hqd_active;
4437  	u32 cp_hqd_vmid;
4438  	u32 cp_hqd_persistent_state;
4439  	u32 cp_hqd_pipe_priority;
4440  	u32 cp_hqd_queue_priority;
4441  	u32 cp_hqd_quantum;
4442  	u32 cp_hqd_pq_base;
4443  	u32 cp_hqd_pq_base_hi;
4444  	u32 cp_hqd_pq_rptr;
4445  	u32 cp_hqd_pq_rptr_report_addr;
4446  	u32 cp_hqd_pq_rptr_report_addr_hi;
4447  	u32 cp_hqd_pq_wptr_poll_addr;
4448  	u32 cp_hqd_pq_wptr_poll_addr_hi;
4449  	u32 cp_hqd_pq_doorbell_control;
4450  	u32 cp_hqd_pq_wptr;
4451  	u32 cp_hqd_pq_control;
4452  	u32 cp_hqd_ib_base_addr;
4453  	u32 cp_hqd_ib_base_addr_hi;
4454  	u32 cp_hqd_ib_rptr;
4455  	u32 cp_hqd_ib_control;
4456  	u32 cp_hqd_iq_timer;
4457  	u32 cp_hqd_iq_rptr;
4458  	u32 cp_hqd_dequeue_request;
4459  	u32 cp_hqd_dma_offload;
4460  	u32 cp_hqd_sema_cmd;
4461  	u32 cp_hqd_msg_type;
4462  	u32 cp_hqd_atomic0_preop_lo;
4463  	u32 cp_hqd_atomic0_preop_hi;
4464  	u32 cp_hqd_atomic1_preop_lo;
4465  	u32 cp_hqd_atomic1_preop_hi;
4466  	u32 cp_hqd_hq_scheduler0;
4467  	u32 cp_hqd_hq_scheduler1;
4468  	u32 cp_mqd_control;
4469  };
4470  
4471  struct bonaire_mqd
4472  {
4473  	u32 header;
4474  	u32 dispatch_initiator;
4475  	u32 dimensions[3];
4476  	u32 start_idx[3];
4477  	u32 num_threads[3];
4478  	u32 pipeline_stat_enable;
4479  	u32 perf_counter_enable;
4480  	u32 pgm[2];
4481  	u32 tba[2];
4482  	u32 tma[2];
4483  	u32 pgm_rsrc[2];
4484  	u32 vmid;
4485  	u32 resource_limits;
4486  	u32 static_thread_mgmt01[2];
4487  	u32 tmp_ring_size;
4488  	u32 static_thread_mgmt23[2];
4489  	u32 restart[3];
4490  	u32 thread_trace_enable;
4491  	u32 reserved1;
4492  	u32 user_data[16];
4493  	u32 vgtcs_invoke_count[2];
4494  	struct hqd_registers queue_state;
4495  	u32 dequeue_cntr;
4496  	u32 interrupt_queue[64];
4497  };
4498  
4499  /**
4500   * cik_cp_compute_resume - setup the compute queue registers
4501   *
4502   * @rdev: radeon_device pointer
4503   *
4504   * Program the compute queues and test them to make sure they
4505   * are working.
4506   * Returns 0 for success, error for failure.
4507   */
cik_cp_compute_resume(struct radeon_device * rdev)4508  static int cik_cp_compute_resume(struct radeon_device *rdev)
4509  {
4510  	int r, i, j, idx;
4511  	u32 tmp;
4512  	bool use_doorbell = true;
4513  	u64 hqd_gpu_addr;
4514  	u64 mqd_gpu_addr;
4515  	u64 eop_gpu_addr;
4516  	u64 wb_gpu_addr;
4517  	u32 *buf;
4518  	struct bonaire_mqd *mqd;
4519  
4520  	r = cik_cp_compute_start(rdev);
4521  	if (r)
4522  		return r;
4523  
4524  	/* fix up chicken bits */
4525  	tmp = RREG32(CP_CPF_DEBUG);
4526  	tmp |= (1 << 23);
4527  	WREG32(CP_CPF_DEBUG, tmp);
4528  
4529  	/* init the pipes */
4530  	mutex_lock(&rdev->srbm_mutex);
4531  
4532  	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533  		int me = (i < 4) ? 1 : 2;
4534  		int pipe = (i < 4) ? i : (i - 4);
4535  
4536  		cik_srbm_select(rdev, me, pipe, 0, 0);
4537  
4538  		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539  		/* write the EOP addr */
4540  		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541  		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542  
4543  		/* set the VMID assigned */
4544  		WREG32(CP_HPD_EOP_VMID, 0);
4545  
4546  		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547  		tmp = RREG32(CP_HPD_EOP_CONTROL);
4548  		tmp &= ~EOP_SIZE_MASK;
4549  		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550  		WREG32(CP_HPD_EOP_CONTROL, tmp);
4551  
4552  	}
4553  	cik_srbm_select(rdev, 0, 0, 0, 0);
4554  	mutex_unlock(&rdev->srbm_mutex);
4555  
4556  	/* init the queues.  Just two for now. */
4557  	for (i = 0; i < 2; i++) {
4558  		if (i == 0)
4559  			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560  		else
4561  			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562  
4563  		if (rdev->ring[idx].mqd_obj == NULL) {
4564  			r = radeon_bo_create(rdev,
4565  					     sizeof(struct bonaire_mqd),
4566  					     PAGE_SIZE, true,
4567  					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568  					     NULL, &rdev->ring[idx].mqd_obj);
4569  			if (r) {
4570  				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571  				return r;
4572  			}
4573  		}
4574  
4575  		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576  		if (unlikely(r != 0)) {
4577  			cik_cp_compute_fini(rdev);
4578  			return r;
4579  		}
4580  		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581  				  &mqd_gpu_addr);
4582  		if (r) {
4583  			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584  			cik_cp_compute_fini(rdev);
4585  			return r;
4586  		}
4587  		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588  		if (r) {
4589  			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590  			cik_cp_compute_fini(rdev);
4591  			return r;
4592  		}
4593  
4594  		/* init the mqd struct */
4595  		memset(buf, 0, sizeof(struct bonaire_mqd));
4596  
4597  		mqd = (struct bonaire_mqd *)buf;
4598  		mqd->header = 0xC0310800;
4599  		mqd->static_thread_mgmt01[0] = 0xffffffff;
4600  		mqd->static_thread_mgmt01[1] = 0xffffffff;
4601  		mqd->static_thread_mgmt23[0] = 0xffffffff;
4602  		mqd->static_thread_mgmt23[1] = 0xffffffff;
4603  
4604  		mutex_lock(&rdev->srbm_mutex);
4605  		cik_srbm_select(rdev, rdev->ring[idx].me,
4606  				rdev->ring[idx].pipe,
4607  				rdev->ring[idx].queue, 0);
4608  
4609  		/* disable wptr polling */
4610  		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611  		tmp &= ~WPTR_POLL_EN;
4612  		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613  
4614  		/* enable doorbell? */
4615  		mqd->queue_state.cp_hqd_pq_doorbell_control =
4616  			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617  		if (use_doorbell)
4618  			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619  		else
4620  			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621  		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622  		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623  
4624  		/* disable the queue if it's active */
4625  		mqd->queue_state.cp_hqd_dequeue_request = 0;
4626  		mqd->queue_state.cp_hqd_pq_rptr = 0;
4627  		mqd->queue_state.cp_hqd_pq_wptr= 0;
4628  		if (RREG32(CP_HQD_ACTIVE) & 1) {
4629  			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630  			for (j = 0; j < rdev->usec_timeout; j++) {
4631  				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632  					break;
4633  				udelay(1);
4634  			}
4635  			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636  			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637  			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638  		}
4639  
4640  		/* set the pointer to the MQD */
4641  		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642  		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643  		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644  		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645  		/* set MQD vmid to 0 */
4646  		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647  		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648  		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649  
4650  		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651  		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652  		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653  		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654  		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655  		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656  
4657  		/* set up the HQD, this is similar to CP_RB0_CNTL */
4658  		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659  		mqd->queue_state.cp_hqd_pq_control &=
4660  			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661  
4662  		mqd->queue_state.cp_hqd_pq_control |=
4663  			order_base_2(rdev->ring[idx].ring_size / 8);
4664  		mqd->queue_state.cp_hqd_pq_control |=
4665  			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666  #ifdef __BIG_ENDIAN
4667  		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668  #endif
4669  		mqd->queue_state.cp_hqd_pq_control &=
4670  			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671  		mqd->queue_state.cp_hqd_pq_control |=
4672  			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673  		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674  
4675  		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676  		if (i == 0)
4677  			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678  		else
4679  			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680  		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681  		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682  		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683  		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684  		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685  
4686  		/* set the wb address wether it's enabled or not */
4687  		if (i == 0)
4688  			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689  		else
4690  			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691  		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692  		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693  			upper_32_bits(wb_gpu_addr) & 0xffff;
4694  		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695  		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696  		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697  		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698  
4699  		/* enable the doorbell if requested */
4700  		if (use_doorbell) {
4701  			mqd->queue_state.cp_hqd_pq_doorbell_control =
4702  				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703  			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704  			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705  				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706  			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707  			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708  				~(DOORBELL_SOURCE | DOORBELL_HIT);
4709  
4710  		} else {
4711  			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712  		}
4713  		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714  		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715  
4716  		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717  		rdev->ring[idx].wptr = 0;
4718  		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719  		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720  		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721  
4722  		/* set the vmid for the queue */
4723  		mqd->queue_state.cp_hqd_vmid = 0;
4724  		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725  
4726  		/* activate the queue */
4727  		mqd->queue_state.cp_hqd_active = 1;
4728  		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729  
4730  		cik_srbm_select(rdev, 0, 0, 0, 0);
4731  		mutex_unlock(&rdev->srbm_mutex);
4732  
4733  		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734  		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735  
4736  		rdev->ring[idx].ready = true;
4737  		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738  		if (r)
4739  			rdev->ring[idx].ready = false;
4740  	}
4741  
4742  	return 0;
4743  }
4744  
cik_cp_enable(struct radeon_device * rdev,bool enable)4745  static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746  {
4747  	cik_cp_gfx_enable(rdev, enable);
4748  	cik_cp_compute_enable(rdev, enable);
4749  }
4750  
cik_cp_load_microcode(struct radeon_device * rdev)4751  static int cik_cp_load_microcode(struct radeon_device *rdev)
4752  {
4753  	int r;
4754  
4755  	r = cik_cp_gfx_load_microcode(rdev);
4756  	if (r)
4757  		return r;
4758  	r = cik_cp_compute_load_microcode(rdev);
4759  	if (r)
4760  		return r;
4761  
4762  	return 0;
4763  }
4764  
cik_cp_fini(struct radeon_device * rdev)4765  static void cik_cp_fini(struct radeon_device *rdev)
4766  {
4767  	cik_cp_gfx_fini(rdev);
4768  	cik_cp_compute_fini(rdev);
4769  }
4770  
cik_cp_resume(struct radeon_device * rdev)4771  static int cik_cp_resume(struct radeon_device *rdev)
4772  {
4773  	int r;
4774  
4775  	cik_enable_gui_idle_interrupt(rdev, false);
4776  
4777  	r = cik_cp_load_microcode(rdev);
4778  	if (r)
4779  		return r;
4780  
4781  	r = cik_cp_gfx_resume(rdev);
4782  	if (r)
4783  		return r;
4784  	r = cik_cp_compute_resume(rdev);
4785  	if (r)
4786  		return r;
4787  
4788  	cik_enable_gui_idle_interrupt(rdev, true);
4789  
4790  	return 0;
4791  }
4792  
cik_print_gpu_status_regs(struct radeon_device * rdev)4793  static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794  {
4795  	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796  		RREG32(GRBM_STATUS));
4797  	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798  		RREG32(GRBM_STATUS2));
4799  	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800  		RREG32(GRBM_STATUS_SE0));
4801  	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802  		RREG32(GRBM_STATUS_SE1));
4803  	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804  		RREG32(GRBM_STATUS_SE2));
4805  	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806  		RREG32(GRBM_STATUS_SE3));
4807  	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808  		RREG32(SRBM_STATUS));
4809  	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810  		RREG32(SRBM_STATUS2));
4811  	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812  		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813  	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814  		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815  	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816  	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817  		 RREG32(CP_STALLED_STAT1));
4818  	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819  		 RREG32(CP_STALLED_STAT2));
4820  	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821  		 RREG32(CP_STALLED_STAT3));
4822  	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823  		 RREG32(CP_CPF_BUSY_STAT));
4824  	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825  		 RREG32(CP_CPF_STALLED_STAT1));
4826  	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827  	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828  	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829  		 RREG32(CP_CPC_STALLED_STAT1));
4830  	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831  }
4832  
4833  /**
4834   * cik_gpu_check_soft_reset - check which blocks are busy
4835   *
4836   * @rdev: radeon_device pointer
4837   *
4838   * Check which blocks are busy and return the relevant reset
4839   * mask to be used by cik_gpu_soft_reset().
4840   * Returns a mask of the blocks to be reset.
4841   */
cik_gpu_check_soft_reset(struct radeon_device * rdev)4842  u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843  {
4844  	u32 reset_mask = 0;
4845  	u32 tmp;
4846  
4847  	/* GRBM_STATUS */
4848  	tmp = RREG32(GRBM_STATUS);
4849  	if (tmp & (PA_BUSY | SC_BUSY |
4850  		   BCI_BUSY | SX_BUSY |
4851  		   TA_BUSY | VGT_BUSY |
4852  		   DB_BUSY | CB_BUSY |
4853  		   GDS_BUSY | SPI_BUSY |
4854  		   IA_BUSY | IA_BUSY_NO_DMA))
4855  		reset_mask |= RADEON_RESET_GFX;
4856  
4857  	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858  		reset_mask |= RADEON_RESET_CP;
4859  
4860  	/* GRBM_STATUS2 */
4861  	tmp = RREG32(GRBM_STATUS2);
4862  	if (tmp & RLC_BUSY)
4863  		reset_mask |= RADEON_RESET_RLC;
4864  
4865  	/* SDMA0_STATUS_REG */
4866  	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867  	if (!(tmp & SDMA_IDLE))
4868  		reset_mask |= RADEON_RESET_DMA;
4869  
4870  	/* SDMA1_STATUS_REG */
4871  	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872  	if (!(tmp & SDMA_IDLE))
4873  		reset_mask |= RADEON_RESET_DMA1;
4874  
4875  	/* SRBM_STATUS2 */
4876  	tmp = RREG32(SRBM_STATUS2);
4877  	if (tmp & SDMA_BUSY)
4878  		reset_mask |= RADEON_RESET_DMA;
4879  
4880  	if (tmp & SDMA1_BUSY)
4881  		reset_mask |= RADEON_RESET_DMA1;
4882  
4883  	/* SRBM_STATUS */
4884  	tmp = RREG32(SRBM_STATUS);
4885  
4886  	if (tmp & IH_BUSY)
4887  		reset_mask |= RADEON_RESET_IH;
4888  
4889  	if (tmp & SEM_BUSY)
4890  		reset_mask |= RADEON_RESET_SEM;
4891  
4892  	if (tmp & GRBM_RQ_PENDING)
4893  		reset_mask |= RADEON_RESET_GRBM;
4894  
4895  	if (tmp & VMC_BUSY)
4896  		reset_mask |= RADEON_RESET_VMC;
4897  
4898  	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899  		   MCC_BUSY | MCD_BUSY))
4900  		reset_mask |= RADEON_RESET_MC;
4901  
4902  	if (evergreen_is_display_hung(rdev))
4903  		reset_mask |= RADEON_RESET_DISPLAY;
4904  
4905  	/* Skip MC reset as it's mostly likely not hung, just busy */
4906  	if (reset_mask & RADEON_RESET_MC) {
4907  		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908  		reset_mask &= ~RADEON_RESET_MC;
4909  	}
4910  
4911  	return reset_mask;
4912  }
4913  
4914  /**
4915   * cik_gpu_soft_reset - soft reset GPU
4916   *
4917   * @rdev: radeon_device pointer
4918   * @reset_mask: mask of which blocks to reset
4919   *
4920   * Soft reset the blocks specified in @reset_mask.
4921   */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)4922  static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923  {
4924  	struct evergreen_mc_save save;
4925  	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926  	u32 tmp;
4927  
4928  	if (reset_mask == 0)
4929  		return;
4930  
4931  	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932  
4933  	cik_print_gpu_status_regs(rdev);
4934  	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935  		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936  	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937  		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938  
4939  	/* disable CG/PG */
4940  	cik_fini_pg(rdev);
4941  	cik_fini_cg(rdev);
4942  
4943  	/* stop the rlc */
4944  	cik_rlc_stop(rdev);
4945  
4946  	/* Disable GFX parsing/prefetching */
4947  	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948  
4949  	/* Disable MEC parsing/prefetching */
4950  	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951  
4952  	if (reset_mask & RADEON_RESET_DMA) {
4953  		/* sdma0 */
4954  		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955  		tmp |= SDMA_HALT;
4956  		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957  	}
4958  	if (reset_mask & RADEON_RESET_DMA1) {
4959  		/* sdma1 */
4960  		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961  		tmp |= SDMA_HALT;
4962  		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963  	}
4964  
4965  	evergreen_mc_stop(rdev, &save);
4966  	if (evergreen_mc_wait_for_idle(rdev)) {
4967  		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968  	}
4969  
4970  	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971  		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972  
4973  	if (reset_mask & RADEON_RESET_CP) {
4974  		grbm_soft_reset |= SOFT_RESET_CP;
4975  
4976  		srbm_soft_reset |= SOFT_RESET_GRBM;
4977  	}
4978  
4979  	if (reset_mask & RADEON_RESET_DMA)
4980  		srbm_soft_reset |= SOFT_RESET_SDMA;
4981  
4982  	if (reset_mask & RADEON_RESET_DMA1)
4983  		srbm_soft_reset |= SOFT_RESET_SDMA1;
4984  
4985  	if (reset_mask & RADEON_RESET_DISPLAY)
4986  		srbm_soft_reset |= SOFT_RESET_DC;
4987  
4988  	if (reset_mask & RADEON_RESET_RLC)
4989  		grbm_soft_reset |= SOFT_RESET_RLC;
4990  
4991  	if (reset_mask & RADEON_RESET_SEM)
4992  		srbm_soft_reset |= SOFT_RESET_SEM;
4993  
4994  	if (reset_mask & RADEON_RESET_IH)
4995  		srbm_soft_reset |= SOFT_RESET_IH;
4996  
4997  	if (reset_mask & RADEON_RESET_GRBM)
4998  		srbm_soft_reset |= SOFT_RESET_GRBM;
4999  
5000  	if (reset_mask & RADEON_RESET_VMC)
5001  		srbm_soft_reset |= SOFT_RESET_VMC;
5002  
5003  	if (!(rdev->flags & RADEON_IS_IGP)) {
5004  		if (reset_mask & RADEON_RESET_MC)
5005  			srbm_soft_reset |= SOFT_RESET_MC;
5006  	}
5007  
5008  	if (grbm_soft_reset) {
5009  		tmp = RREG32(GRBM_SOFT_RESET);
5010  		tmp |= grbm_soft_reset;
5011  		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012  		WREG32(GRBM_SOFT_RESET, tmp);
5013  		tmp = RREG32(GRBM_SOFT_RESET);
5014  
5015  		udelay(50);
5016  
5017  		tmp &= ~grbm_soft_reset;
5018  		WREG32(GRBM_SOFT_RESET, tmp);
5019  		tmp = RREG32(GRBM_SOFT_RESET);
5020  	}
5021  
5022  	if (srbm_soft_reset) {
5023  		tmp = RREG32(SRBM_SOFT_RESET);
5024  		tmp |= srbm_soft_reset;
5025  		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026  		WREG32(SRBM_SOFT_RESET, tmp);
5027  		tmp = RREG32(SRBM_SOFT_RESET);
5028  
5029  		udelay(50);
5030  
5031  		tmp &= ~srbm_soft_reset;
5032  		WREG32(SRBM_SOFT_RESET, tmp);
5033  		tmp = RREG32(SRBM_SOFT_RESET);
5034  	}
5035  
5036  	/* Wait a little for things to settle down */
5037  	udelay(50);
5038  
5039  	evergreen_mc_resume(rdev, &save);
5040  	udelay(50);
5041  
5042  	cik_print_gpu_status_regs(rdev);
5043  }
5044  
5045  struct kv_reset_save_regs {
5046  	u32 gmcon_reng_execute;
5047  	u32 gmcon_misc;
5048  	u32 gmcon_misc3;
5049  };
5050  
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5051  static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052  				   struct kv_reset_save_regs *save)
5053  {
5054  	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055  	save->gmcon_misc = RREG32(GMCON_MISC);
5056  	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057  
5058  	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059  	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060  						STCTRL_STUTTER_EN));
5061  }
5062  
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5063  static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064  				      struct kv_reset_save_regs *save)
5065  {
5066  	int i;
5067  
5068  	WREG32(GMCON_PGFSM_WRITE, 0);
5069  	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070  
5071  	for (i = 0; i < 5; i++)
5072  		WREG32(GMCON_PGFSM_WRITE, 0);
5073  
5074  	WREG32(GMCON_PGFSM_WRITE, 0);
5075  	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076  
5077  	for (i = 0; i < 5; i++)
5078  		WREG32(GMCON_PGFSM_WRITE, 0);
5079  
5080  	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081  	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082  
5083  	for (i = 0; i < 5; i++)
5084  		WREG32(GMCON_PGFSM_WRITE, 0);
5085  
5086  	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087  	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088  
5089  	for (i = 0; i < 5; i++)
5090  		WREG32(GMCON_PGFSM_WRITE, 0);
5091  
5092  	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093  	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094  
5095  	for (i = 0; i < 5; i++)
5096  		WREG32(GMCON_PGFSM_WRITE, 0);
5097  
5098  	WREG32(GMCON_PGFSM_WRITE, 0);
5099  	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100  
5101  	for (i = 0; i < 5; i++)
5102  		WREG32(GMCON_PGFSM_WRITE, 0);
5103  
5104  	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105  	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106  
5107  	for (i = 0; i < 5; i++)
5108  		WREG32(GMCON_PGFSM_WRITE, 0);
5109  
5110  	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111  	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112  
5113  	for (i = 0; i < 5; i++)
5114  		WREG32(GMCON_PGFSM_WRITE, 0);
5115  
5116  	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117  	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118  
5119  	for (i = 0; i < 5; i++)
5120  		WREG32(GMCON_PGFSM_WRITE, 0);
5121  
5122  	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123  	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124  
5125  	for (i = 0; i < 5; i++)
5126  		WREG32(GMCON_PGFSM_WRITE, 0);
5127  
5128  	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129  	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130  
5131  	WREG32(GMCON_MISC3, save->gmcon_misc3);
5132  	WREG32(GMCON_MISC, save->gmcon_misc);
5133  	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134  }
5135  
cik_gpu_pci_config_reset(struct radeon_device * rdev)5136  static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137  {
5138  	struct evergreen_mc_save save;
5139  	struct kv_reset_save_regs kv_save = { 0 };
5140  	u32 tmp, i;
5141  
5142  	dev_info(rdev->dev, "GPU pci config reset\n");
5143  
5144  	/* disable dpm? */
5145  
5146  	/* disable cg/pg */
5147  	cik_fini_pg(rdev);
5148  	cik_fini_cg(rdev);
5149  
5150  	/* Disable GFX parsing/prefetching */
5151  	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152  
5153  	/* Disable MEC parsing/prefetching */
5154  	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155  
5156  	/* sdma0 */
5157  	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158  	tmp |= SDMA_HALT;
5159  	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160  	/* sdma1 */
5161  	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162  	tmp |= SDMA_HALT;
5163  	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164  	/* XXX other engines? */
5165  
5166  	/* halt the rlc, disable cp internal ints */
5167  	cik_rlc_stop(rdev);
5168  
5169  	udelay(50);
5170  
5171  	/* disable mem access */
5172  	evergreen_mc_stop(rdev, &save);
5173  	if (evergreen_mc_wait_for_idle(rdev)) {
5174  		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175  	}
5176  
5177  	if (rdev->flags & RADEON_IS_IGP)
5178  		kv_save_regs_for_reset(rdev, &kv_save);
5179  
5180  	/* disable BM */
5181  	pci_clear_master(rdev->pdev);
5182  	/* reset */
5183  	radeon_pci_config_reset(rdev);
5184  
5185  	udelay(100);
5186  
5187  	/* wait for asic to come out of reset */
5188  	for (i = 0; i < rdev->usec_timeout; i++) {
5189  		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190  			break;
5191  		udelay(1);
5192  	}
5193  
5194  	/* does asic init need to be run first??? */
5195  	if (rdev->flags & RADEON_IS_IGP)
5196  		kv_restore_regs_for_reset(rdev, &kv_save);
5197  }
5198  
5199  /**
5200   * cik_asic_reset - soft reset GPU
5201   *
5202   * @rdev: radeon_device pointer
5203   * @hard: force hard reset
5204   *
5205   * Look up which blocks are hung and attempt
5206   * to reset them.
5207   * Returns 0 for success.
5208   */
cik_asic_reset(struct radeon_device * rdev,bool hard)5209  int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210  {
5211  	u32 reset_mask;
5212  
5213  	if (hard) {
5214  		cik_gpu_pci_config_reset(rdev);
5215  		return 0;
5216  	}
5217  
5218  	reset_mask = cik_gpu_check_soft_reset(rdev);
5219  
5220  	if (reset_mask)
5221  		r600_set_bios_scratch_engine_hung(rdev, true);
5222  
5223  	/* try soft reset */
5224  	cik_gpu_soft_reset(rdev, reset_mask);
5225  
5226  	reset_mask = cik_gpu_check_soft_reset(rdev);
5227  
5228  	/* try pci config reset */
5229  	if (reset_mask && radeon_hard_reset)
5230  		cik_gpu_pci_config_reset(rdev);
5231  
5232  	reset_mask = cik_gpu_check_soft_reset(rdev);
5233  
5234  	if (!reset_mask)
5235  		r600_set_bios_scratch_engine_hung(rdev, false);
5236  
5237  	return 0;
5238  }
5239  
5240  /**
5241   * cik_gfx_is_lockup - check if the 3D engine is locked up
5242   *
5243   * @rdev: radeon_device pointer
5244   * @ring: radeon_ring structure holding ring information
5245   *
5246   * Check if the 3D engine is locked up (CIK).
5247   * Returns true if the engine is locked, false if not.
5248   */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5249  bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250  {
5251  	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252  
5253  	if (!(reset_mask & (RADEON_RESET_GFX |
5254  			    RADEON_RESET_COMPUTE |
5255  			    RADEON_RESET_CP))) {
5256  		radeon_ring_lockup_update(rdev, ring);
5257  		return false;
5258  	}
5259  	return radeon_ring_test_lockup(rdev, ring);
5260  }
5261  
5262  /* MC */
5263  /**
5264   * cik_mc_program - program the GPU memory controller
5265   *
5266   * @rdev: radeon_device pointer
5267   *
5268   * Set the location of vram, gart, and AGP in the GPU's
5269   * physical address space (CIK).
5270   */
cik_mc_program(struct radeon_device * rdev)5271  static void cik_mc_program(struct radeon_device *rdev)
5272  {
5273  	struct evergreen_mc_save save;
5274  	u32 tmp;
5275  	int i, j;
5276  
5277  	/* Initialize HDP */
5278  	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279  		WREG32((0x2c14 + j), 0x00000000);
5280  		WREG32((0x2c18 + j), 0x00000000);
5281  		WREG32((0x2c1c + j), 0x00000000);
5282  		WREG32((0x2c20 + j), 0x00000000);
5283  		WREG32((0x2c24 + j), 0x00000000);
5284  	}
5285  	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286  
5287  	evergreen_mc_stop(rdev, &save);
5288  	if (radeon_mc_wait_for_idle(rdev)) {
5289  		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290  	}
5291  	/* Lockout access through VGA aperture*/
5292  	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293  	/* Update configuration */
5294  	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295  	       rdev->mc.vram_start >> 12);
5296  	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297  	       rdev->mc.vram_end >> 12);
5298  	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299  	       rdev->vram_scratch.gpu_addr >> 12);
5300  	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301  	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302  	WREG32(MC_VM_FB_LOCATION, tmp);
5303  	/* XXX double check these! */
5304  	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305  	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306  	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307  	WREG32(MC_VM_AGP_BASE, 0);
5308  	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309  	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310  	if (radeon_mc_wait_for_idle(rdev)) {
5311  		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312  	}
5313  	evergreen_mc_resume(rdev, &save);
5314  	/* we need to own VRAM, so turn off the VGA renderer here
5315  	 * to stop it overwriting our objects */
5316  	rv515_vga_render_disable(rdev);
5317  }
5318  
5319  /**
5320   * cik_mc_init - initialize the memory controller driver params
5321   *
5322   * @rdev: radeon_device pointer
5323   *
5324   * Look up the amount of vram, vram width, and decide how to place
5325   * vram and gart within the GPU's physical address space (CIK).
5326   * Returns 0 for success.
5327   */
cik_mc_init(struct radeon_device * rdev)5328  static int cik_mc_init(struct radeon_device *rdev)
5329  {
5330  	u32 tmp;
5331  	int chansize, numchan;
5332  
5333  	/* Get VRAM informations */
5334  	rdev->mc.vram_is_ddr = true;
5335  	tmp = RREG32(MC_ARB_RAMCFG);
5336  	if (tmp & CHANSIZE_MASK) {
5337  		chansize = 64;
5338  	} else {
5339  		chansize = 32;
5340  	}
5341  	tmp = RREG32(MC_SHARED_CHMAP);
5342  	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343  	case 0:
5344  	default:
5345  		numchan = 1;
5346  		break;
5347  	case 1:
5348  		numchan = 2;
5349  		break;
5350  	case 2:
5351  		numchan = 4;
5352  		break;
5353  	case 3:
5354  		numchan = 8;
5355  		break;
5356  	case 4:
5357  		numchan = 3;
5358  		break;
5359  	case 5:
5360  		numchan = 6;
5361  		break;
5362  	case 6:
5363  		numchan = 10;
5364  		break;
5365  	case 7:
5366  		numchan = 12;
5367  		break;
5368  	case 8:
5369  		numchan = 16;
5370  		break;
5371  	}
5372  	rdev->mc.vram_width = numchan * chansize;
5373  	/* Could aper size report 0 ? */
5374  	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375  	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376  	/* size in MB on si */
5377  	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378  	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379  	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380  	si_vram_gtt_location(rdev, &rdev->mc);
5381  	radeon_update_bandwidth_info(rdev);
5382  
5383  	return 0;
5384  }
5385  
5386  /*
5387   * GART
5388   * VMID 0 is the physical GPU addresses as used by the kernel.
5389   * VMIDs 1-15 are used for userspace clients and are handled
5390   * by the radeon vm/hsa code.
5391   */
5392  /**
5393   * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394   *
5395   * @rdev: radeon_device pointer
5396   *
5397   * Flush the TLB for the VMID 0 page table (CIK).
5398   */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5399  void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400  {
5401  	/* flush hdp cache */
5402  	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403  
5404  	/* bits 0-15 are the VM contexts0-15 */
5405  	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406  }
5407  
5408  /**
5409   * cik_pcie_gart_enable - gart enable
5410   *
5411   * @rdev: radeon_device pointer
5412   *
5413   * This sets up the TLBs, programs the page tables for VMID0,
5414   * sets up the hw for VMIDs 1-15 which are allocated on
5415   * demand, and sets up the global locations for the LDS, GDS,
5416   * and GPUVM for FSA64 clients (CIK).
5417   * Returns 0 for success, errors for failure.
5418   */
cik_pcie_gart_enable(struct radeon_device * rdev)5419  static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420  {
5421  	int r, i;
5422  
5423  	if (rdev->gart.robj == NULL) {
5424  		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425  		return -EINVAL;
5426  	}
5427  	r = radeon_gart_table_vram_pin(rdev);
5428  	if (r)
5429  		return r;
5430  	/* Setup TLB control */
5431  	WREG32(MC_VM_MX_L1_TLB_CNTL,
5432  	       (0xA << 7) |
5433  	       ENABLE_L1_TLB |
5434  	       ENABLE_L1_FRAGMENT_PROCESSING |
5435  	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436  	       ENABLE_ADVANCED_DRIVER_MODEL |
5437  	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438  	/* Setup L2 cache */
5439  	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440  	       ENABLE_L2_FRAGMENT_PROCESSING |
5441  	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442  	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443  	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5444  	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445  	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446  	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447  	       BANK_SELECT(4) |
5448  	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449  	/* setup context0 */
5450  	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451  	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452  	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453  	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454  			(u32)(rdev->dummy_page.addr >> 12));
5455  	WREG32(VM_CONTEXT0_CNTL2, 0);
5456  	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457  				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458  
5459  	WREG32(0x15D4, 0);
5460  	WREG32(0x15D8, 0);
5461  	WREG32(0x15DC, 0);
5462  
5463  	/* restore context1-15 */
5464  	/* set vm size, must be a multiple of 4 */
5465  	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466  	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467  	for (i = 1; i < 16; i++) {
5468  		if (i < 8)
5469  			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470  			       rdev->vm_manager.saved_table_addr[i]);
5471  		else
5472  			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473  			       rdev->vm_manager.saved_table_addr[i]);
5474  	}
5475  
5476  	/* enable context1-15 */
5477  	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478  	       (u32)(rdev->dummy_page.addr >> 12));
5479  	WREG32(VM_CONTEXT1_CNTL2, 4);
5480  	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481  				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482  				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483  				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484  				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485  				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486  				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487  				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488  				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489  				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490  				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491  				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492  				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493  				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494  
5495  	if (rdev->family == CHIP_KAVERI) {
5496  		u32 tmp = RREG32(CHUB_CONTROL);
5497  		tmp &= ~BYPASS_VM;
5498  		WREG32(CHUB_CONTROL, tmp);
5499  	}
5500  
5501  	/* XXX SH_MEM regs */
5502  	/* where to put LDS, scratch, GPUVM in FSA64 space */
5503  	mutex_lock(&rdev->srbm_mutex);
5504  	for (i = 0; i < 16; i++) {
5505  		cik_srbm_select(rdev, 0, 0, 0, i);
5506  		/* CP and shaders */
5507  		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508  		WREG32(SH_MEM_APE1_BASE, 1);
5509  		WREG32(SH_MEM_APE1_LIMIT, 0);
5510  		WREG32(SH_MEM_BASES, 0);
5511  		/* SDMA GFX */
5512  		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513  		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514  		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515  		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516  		/* XXX SDMA RLC - todo */
5517  	}
5518  	cik_srbm_select(rdev, 0, 0, 0, 0);
5519  	mutex_unlock(&rdev->srbm_mutex);
5520  
5521  	cik_pcie_gart_tlb_flush(rdev);
5522  	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523  		 (unsigned)(rdev->mc.gtt_size >> 20),
5524  		 (unsigned long long)rdev->gart.table_addr);
5525  	rdev->gart.ready = true;
5526  	return 0;
5527  }
5528  
5529  /**
5530   * cik_pcie_gart_disable - gart disable
5531   *
5532   * @rdev: radeon_device pointer
5533   *
5534   * This disables all VM page table (CIK).
5535   */
cik_pcie_gart_disable(struct radeon_device * rdev)5536  static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537  {
5538  	unsigned i;
5539  
5540  	for (i = 1; i < 16; ++i) {
5541  		uint32_t reg;
5542  		if (i < 8)
5543  			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544  		else
5545  			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546  		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547  	}
5548  
5549  	/* Disable all tables */
5550  	WREG32(VM_CONTEXT0_CNTL, 0);
5551  	WREG32(VM_CONTEXT1_CNTL, 0);
5552  	/* Setup TLB control */
5553  	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554  	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555  	/* Setup L2 cache */
5556  	WREG32(VM_L2_CNTL,
5557  	       ENABLE_L2_FRAGMENT_PROCESSING |
5558  	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559  	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560  	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5561  	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562  	WREG32(VM_L2_CNTL2, 0);
5563  	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564  	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565  	radeon_gart_table_vram_unpin(rdev);
5566  }
5567  
5568  /**
5569   * cik_pcie_gart_fini - vm fini callback
5570   *
5571   * @rdev: radeon_device pointer
5572   *
5573   * Tears down the driver GART/VM setup (CIK).
5574   */
cik_pcie_gart_fini(struct radeon_device * rdev)5575  static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576  {
5577  	cik_pcie_gart_disable(rdev);
5578  	radeon_gart_table_vram_free(rdev);
5579  	radeon_gart_fini(rdev);
5580  }
5581  
5582  /* vm parser */
5583  /**
5584   * cik_ib_parse - vm ib_parse callback
5585   *
5586   * @rdev: radeon_device pointer
5587   * @ib: indirect buffer pointer
5588   *
5589   * CIK uses hw IB checking so this is a nop (CIK).
5590   */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5591  int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592  {
5593  	return 0;
5594  }
5595  
5596  /*
5597   * vm
5598   * VMID 0 is the physical GPU addresses as used by the kernel.
5599   * VMIDs 1-15 are used for userspace clients and are handled
5600   * by the radeon vm/hsa code.
5601   */
5602  /**
5603   * cik_vm_init - cik vm init callback
5604   *
5605   * @rdev: radeon_device pointer
5606   *
5607   * Inits cik specific vm parameters (number of VMs, base of vram for
5608   * VMIDs 1-15) (CIK).
5609   * Returns 0 for success.
5610   */
cik_vm_init(struct radeon_device * rdev)5611  int cik_vm_init(struct radeon_device *rdev)
5612  {
5613  	/*
5614  	 * number of VMs
5615  	 * VMID 0 is reserved for System
5616  	 * radeon graphics/compute will use VMIDs 1-15
5617  	 */
5618  	rdev->vm_manager.nvm = 16;
5619  	/* base offset of vram pages */
5620  	if (rdev->flags & RADEON_IS_IGP) {
5621  		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622  		tmp <<= 22;
5623  		rdev->vm_manager.vram_base_offset = tmp;
5624  	} else
5625  		rdev->vm_manager.vram_base_offset = 0;
5626  
5627  	return 0;
5628  }
5629  
5630  /**
5631   * cik_vm_fini - cik vm fini callback
5632   *
5633   * @rdev: radeon_device pointer
5634   *
5635   * Tear down any asic specific VM setup (CIK).
5636   */
cik_vm_fini(struct radeon_device * rdev)5637  void cik_vm_fini(struct radeon_device *rdev)
5638  {
5639  }
5640  
5641  /**
5642   * cik_vm_decode_fault - print human readable fault info
5643   *
5644   * @rdev: radeon_device pointer
5645   * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646   * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647   * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648   *
5649   * Print human readable fault information (CIK).
5650   */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)5651  static void cik_vm_decode_fault(struct radeon_device *rdev,
5652  				u32 status, u32 addr, u32 mc_client)
5653  {
5654  	u32 mc_id;
5655  	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656  	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657  	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658  		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659  
5660  	if (rdev->family == CHIP_HAWAII)
5661  		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662  	else
5663  		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664  
5665  	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666  	       protections, vmid, addr,
5667  	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668  	       block, mc_client, mc_id);
5669  }
5670  
5671  /*
5672   * cik_vm_flush - cik vm flush using the CP
5673   *
5674   * Update the page table base and flush the VM TLB
5675   * using the CP (CIK).
5676   */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5677  void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678  		  unsigned vm_id, uint64_t pd_addr)
5679  {
5680  	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681  
5682  	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683  	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684  				 WRITE_DATA_DST_SEL(0)));
5685  	if (vm_id < 8) {
5686  		radeon_ring_write(ring,
5687  				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688  	} else {
5689  		radeon_ring_write(ring,
5690  				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691  	}
5692  	radeon_ring_write(ring, 0);
5693  	radeon_ring_write(ring, pd_addr >> 12);
5694  
5695  	/* update SH_MEM_* regs */
5696  	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697  	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698  				 WRITE_DATA_DST_SEL(0)));
5699  	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700  	radeon_ring_write(ring, 0);
5701  	radeon_ring_write(ring, VMID(vm_id));
5702  
5703  	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704  	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705  				 WRITE_DATA_DST_SEL(0)));
5706  	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707  	radeon_ring_write(ring, 0);
5708  
5709  	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710  	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711  	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712  	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713  
5714  	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715  	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716  				 WRITE_DATA_DST_SEL(0)));
5717  	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718  	radeon_ring_write(ring, 0);
5719  	radeon_ring_write(ring, VMID(0));
5720  
5721  	/* HDP flush */
5722  	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723  
5724  	/* bits 0-15 are the VM contexts0-15 */
5725  	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726  	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727  				 WRITE_DATA_DST_SEL(0)));
5728  	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729  	radeon_ring_write(ring, 0);
5730  	radeon_ring_write(ring, 1 << vm_id);
5731  
5732  	/* wait for the invalidate to complete */
5733  	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734  	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735  				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736  				 WAIT_REG_MEM_ENGINE(0))); /* me */
5737  	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738  	radeon_ring_write(ring, 0);
5739  	radeon_ring_write(ring, 0); /* ref */
5740  	radeon_ring_write(ring, 0); /* mask */
5741  	radeon_ring_write(ring, 0x20); /* poll interval */
5742  
5743  	/* compute doesn't have PFP */
5744  	if (usepfp) {
5745  		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5746  		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747  		radeon_ring_write(ring, 0x0);
5748  	}
5749  }
5750  
5751  /*
5752   * RLC
5753   * The RLC is a multi-purpose microengine that handles a
5754   * variety of functions, the most important of which is
5755   * the interrupt controller.
5756   */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5757  static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758  					  bool enable)
5759  {
5760  	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761  
5762  	if (enable)
5763  		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764  	else
5765  		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766  	WREG32(CP_INT_CNTL_RING0, tmp);
5767  }
5768  
cik_enable_lbpw(struct radeon_device * rdev,bool enable)5769  static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770  {
5771  	u32 tmp;
5772  
5773  	tmp = RREG32(RLC_LB_CNTL);
5774  	if (enable)
5775  		tmp |= LOAD_BALANCE_ENABLE;
5776  	else
5777  		tmp &= ~LOAD_BALANCE_ENABLE;
5778  	WREG32(RLC_LB_CNTL, tmp);
5779  }
5780  
cik_wait_for_rlc_serdes(struct radeon_device * rdev)5781  static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782  {
5783  	u32 i, j, k;
5784  	u32 mask;
5785  
5786  	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787  		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788  			cik_select_se_sh(rdev, i, j);
5789  			for (k = 0; k < rdev->usec_timeout; k++) {
5790  				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791  					break;
5792  				udelay(1);
5793  			}
5794  		}
5795  	}
5796  	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797  
5798  	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799  	for (k = 0; k < rdev->usec_timeout; k++) {
5800  		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801  			break;
5802  		udelay(1);
5803  	}
5804  }
5805  
cik_update_rlc(struct radeon_device * rdev,u32 rlc)5806  static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807  {
5808  	u32 tmp;
5809  
5810  	tmp = RREG32(RLC_CNTL);
5811  	if (tmp != rlc)
5812  		WREG32(RLC_CNTL, rlc);
5813  }
5814  
cik_halt_rlc(struct radeon_device * rdev)5815  static u32 cik_halt_rlc(struct radeon_device *rdev)
5816  {
5817  	u32 data, orig;
5818  
5819  	orig = data = RREG32(RLC_CNTL);
5820  
5821  	if (data & RLC_ENABLE) {
5822  		u32 i;
5823  
5824  		data &= ~RLC_ENABLE;
5825  		WREG32(RLC_CNTL, data);
5826  
5827  		for (i = 0; i < rdev->usec_timeout; i++) {
5828  			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829  				break;
5830  			udelay(1);
5831  		}
5832  
5833  		cik_wait_for_rlc_serdes(rdev);
5834  	}
5835  
5836  	return orig;
5837  }
5838  
cik_enter_rlc_safe_mode(struct radeon_device * rdev)5839  void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840  {
5841  	u32 tmp, i, mask;
5842  
5843  	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844  	WREG32(RLC_GPR_REG2, tmp);
5845  
5846  	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847  	for (i = 0; i < rdev->usec_timeout; i++) {
5848  		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849  			break;
5850  		udelay(1);
5851  	}
5852  
5853  	for (i = 0; i < rdev->usec_timeout; i++) {
5854  		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855  			break;
5856  		udelay(1);
5857  	}
5858  }
5859  
cik_exit_rlc_safe_mode(struct radeon_device * rdev)5860  void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861  {
5862  	u32 tmp;
5863  
5864  	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865  	WREG32(RLC_GPR_REG2, tmp);
5866  }
5867  
5868  /**
5869   * cik_rlc_stop - stop the RLC ME
5870   *
5871   * @rdev: radeon_device pointer
5872   *
5873   * Halt the RLC ME (MicroEngine) (CIK).
5874   */
cik_rlc_stop(struct radeon_device * rdev)5875  static void cik_rlc_stop(struct radeon_device *rdev)
5876  {
5877  	WREG32(RLC_CNTL, 0);
5878  
5879  	cik_enable_gui_idle_interrupt(rdev, false);
5880  
5881  	cik_wait_for_rlc_serdes(rdev);
5882  }
5883  
5884  /**
5885   * cik_rlc_start - start the RLC ME
5886   *
5887   * @rdev: radeon_device pointer
5888   *
5889   * Unhalt the RLC ME (MicroEngine) (CIK).
5890   */
cik_rlc_start(struct radeon_device * rdev)5891  static void cik_rlc_start(struct radeon_device *rdev)
5892  {
5893  	WREG32(RLC_CNTL, RLC_ENABLE);
5894  
5895  	cik_enable_gui_idle_interrupt(rdev, true);
5896  
5897  	udelay(50);
5898  }
5899  
5900  /**
5901   * cik_rlc_resume - setup the RLC hw
5902   *
5903   * @rdev: radeon_device pointer
5904   *
5905   * Initialize the RLC registers, load the ucode,
5906   * and start the RLC (CIK).
5907   * Returns 0 for success, -EINVAL if the ucode is not available.
5908   */
cik_rlc_resume(struct radeon_device * rdev)5909  static int cik_rlc_resume(struct radeon_device *rdev)
5910  {
5911  	u32 i, size, tmp;
5912  
5913  	if (!rdev->rlc_fw)
5914  		return -EINVAL;
5915  
5916  	cik_rlc_stop(rdev);
5917  
5918  	/* disable CG */
5919  	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920  	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921  
5922  	si_rlc_reset(rdev);
5923  
5924  	cik_init_pg(rdev);
5925  
5926  	cik_init_cg(rdev);
5927  
5928  	WREG32(RLC_LB_CNTR_INIT, 0);
5929  	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930  
5931  	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932  	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933  	WREG32(RLC_LB_PARAMS, 0x00600408);
5934  	WREG32(RLC_LB_CNTL, 0x80000004);
5935  
5936  	WREG32(RLC_MC_CNTL, 0);
5937  	WREG32(RLC_UCODE_CNTL, 0);
5938  
5939  	if (rdev->new_fw) {
5940  		const struct rlc_firmware_header_v1_0 *hdr =
5941  			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942  		const __le32 *fw_data = (const __le32 *)
5943  			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944  
5945  		radeon_ucode_print_rlc_hdr(&hdr->header);
5946  
5947  		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948  		WREG32(RLC_GPM_UCODE_ADDR, 0);
5949  		for (i = 0; i < size; i++)
5950  			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951  		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952  	} else {
5953  		const __be32 *fw_data;
5954  
5955  		switch (rdev->family) {
5956  		case CHIP_BONAIRE:
5957  		case CHIP_HAWAII:
5958  		default:
5959  			size = BONAIRE_RLC_UCODE_SIZE;
5960  			break;
5961  		case CHIP_KAVERI:
5962  			size = KV_RLC_UCODE_SIZE;
5963  			break;
5964  		case CHIP_KABINI:
5965  			size = KB_RLC_UCODE_SIZE;
5966  			break;
5967  		case CHIP_MULLINS:
5968  			size = ML_RLC_UCODE_SIZE;
5969  			break;
5970  		}
5971  
5972  		fw_data = (const __be32 *)rdev->rlc_fw->data;
5973  		WREG32(RLC_GPM_UCODE_ADDR, 0);
5974  		for (i = 0; i < size; i++)
5975  			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976  		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977  	}
5978  
5979  	/* XXX - find out what chips support lbpw */
5980  	cik_enable_lbpw(rdev, false);
5981  
5982  	if (rdev->family == CHIP_BONAIRE)
5983  		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984  
5985  	cik_rlc_start(rdev);
5986  
5987  	return 0;
5988  }
5989  
cik_enable_cgcg(struct radeon_device * rdev,bool enable)5990  static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991  {
5992  	u32 data, orig, tmp, tmp2;
5993  
5994  	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995  
5996  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997  		cik_enable_gui_idle_interrupt(rdev, true);
5998  
5999  		tmp = cik_halt_rlc(rdev);
6000  
6001  		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002  		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003  		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004  		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005  		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006  
6007  		cik_update_rlc(rdev, tmp);
6008  
6009  		data |= CGCG_EN | CGLS_EN;
6010  	} else {
6011  		cik_enable_gui_idle_interrupt(rdev, false);
6012  
6013  		RREG32(CB_CGTT_SCLK_CTRL);
6014  		RREG32(CB_CGTT_SCLK_CTRL);
6015  		RREG32(CB_CGTT_SCLK_CTRL);
6016  		RREG32(CB_CGTT_SCLK_CTRL);
6017  
6018  		data &= ~(CGCG_EN | CGLS_EN);
6019  	}
6020  
6021  	if (orig != data)
6022  		WREG32(RLC_CGCG_CGLS_CTRL, data);
6023  
6024  }
6025  
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6026  static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027  {
6028  	u32 data, orig, tmp = 0;
6029  
6030  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031  		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032  			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033  				orig = data = RREG32(CP_MEM_SLP_CNTL);
6034  				data |= CP_MEM_LS_EN;
6035  				if (orig != data)
6036  					WREG32(CP_MEM_SLP_CNTL, data);
6037  			}
6038  		}
6039  
6040  		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041  		data |= 0x00000001;
6042  		data &= 0xfffffffd;
6043  		if (orig != data)
6044  			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045  
6046  		tmp = cik_halt_rlc(rdev);
6047  
6048  		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049  		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050  		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051  		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052  		WREG32(RLC_SERDES_WR_CTRL, data);
6053  
6054  		cik_update_rlc(rdev, tmp);
6055  
6056  		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057  			orig = data = RREG32(CGTS_SM_CTRL_REG);
6058  			data &= ~SM_MODE_MASK;
6059  			data |= SM_MODE(0x2);
6060  			data |= SM_MODE_ENABLE;
6061  			data &= ~CGTS_OVERRIDE;
6062  			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063  			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064  				data &= ~CGTS_LS_OVERRIDE;
6065  			data &= ~ON_MONITOR_ADD_MASK;
6066  			data |= ON_MONITOR_ADD_EN;
6067  			data |= ON_MONITOR_ADD(0x96);
6068  			if (orig != data)
6069  				WREG32(CGTS_SM_CTRL_REG, data);
6070  		}
6071  	} else {
6072  		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073  		data |= 0x00000003;
6074  		if (orig != data)
6075  			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076  
6077  		data = RREG32(RLC_MEM_SLP_CNTL);
6078  		if (data & RLC_MEM_LS_EN) {
6079  			data &= ~RLC_MEM_LS_EN;
6080  			WREG32(RLC_MEM_SLP_CNTL, data);
6081  		}
6082  
6083  		data = RREG32(CP_MEM_SLP_CNTL);
6084  		if (data & CP_MEM_LS_EN) {
6085  			data &= ~CP_MEM_LS_EN;
6086  			WREG32(CP_MEM_SLP_CNTL, data);
6087  		}
6088  
6089  		orig = data = RREG32(CGTS_SM_CTRL_REG);
6090  		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091  		if (orig != data)
6092  			WREG32(CGTS_SM_CTRL_REG, data);
6093  
6094  		tmp = cik_halt_rlc(rdev);
6095  
6096  		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097  		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098  		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099  		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100  		WREG32(RLC_SERDES_WR_CTRL, data);
6101  
6102  		cik_update_rlc(rdev, tmp);
6103  	}
6104  }
6105  
6106  static const u32 mc_cg_registers[] =
6107  {
6108  	MC_HUB_MISC_HUB_CG,
6109  	MC_HUB_MISC_SIP_CG,
6110  	MC_HUB_MISC_VM_CG,
6111  	MC_XPB_CLK_GAT,
6112  	ATC_MISC_CG,
6113  	MC_CITF_MISC_WR_CG,
6114  	MC_CITF_MISC_RD_CG,
6115  	MC_CITF_MISC_VM_CG,
6116  	VM_L2_CG,
6117  };
6118  
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6119  static void cik_enable_mc_ls(struct radeon_device *rdev,
6120  			     bool enable)
6121  {
6122  	int i;
6123  	u32 orig, data;
6124  
6125  	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126  		orig = data = RREG32(mc_cg_registers[i]);
6127  		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128  			data |= MC_LS_ENABLE;
6129  		else
6130  			data &= ~MC_LS_ENABLE;
6131  		if (data != orig)
6132  			WREG32(mc_cg_registers[i], data);
6133  	}
6134  }
6135  
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6136  static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137  			       bool enable)
6138  {
6139  	int i;
6140  	u32 orig, data;
6141  
6142  	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143  		orig = data = RREG32(mc_cg_registers[i]);
6144  		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145  			data |= MC_CG_ENABLE;
6146  		else
6147  			data &= ~MC_CG_ENABLE;
6148  		if (data != orig)
6149  			WREG32(mc_cg_registers[i], data);
6150  	}
6151  }
6152  
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6153  static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154  				 bool enable)
6155  {
6156  	u32 orig, data;
6157  
6158  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159  		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160  		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161  	} else {
6162  		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163  		data |= 0xff000000;
6164  		if (data != orig)
6165  			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166  
6167  		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168  		data |= 0xff000000;
6169  		if (data != orig)
6170  			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171  	}
6172  }
6173  
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6174  static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175  				 bool enable)
6176  {
6177  	u32 orig, data;
6178  
6179  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180  		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181  		data |= 0x100;
6182  		if (orig != data)
6183  			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184  
6185  		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186  		data |= 0x100;
6187  		if (orig != data)
6188  			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189  	} else {
6190  		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191  		data &= ~0x100;
6192  		if (orig != data)
6193  			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194  
6195  		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196  		data &= ~0x100;
6197  		if (orig != data)
6198  			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199  	}
6200  }
6201  
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6202  static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203  				bool enable)
6204  {
6205  	u32 orig, data;
6206  
6207  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208  		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209  		data = 0xfff;
6210  		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211  
6212  		orig = data = RREG32(UVD_CGC_CTRL);
6213  		data |= DCM;
6214  		if (orig != data)
6215  			WREG32(UVD_CGC_CTRL, data);
6216  	} else {
6217  		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218  		data &= ~0xfff;
6219  		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220  
6221  		orig = data = RREG32(UVD_CGC_CTRL);
6222  		data &= ~DCM;
6223  		if (orig != data)
6224  			WREG32(UVD_CGC_CTRL, data);
6225  	}
6226  }
6227  
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6228  static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229  			       bool enable)
6230  {
6231  	u32 orig, data;
6232  
6233  	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234  
6235  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236  		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237  			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238  	else
6239  		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240  			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241  
6242  	if (orig != data)
6243  		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244  }
6245  
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6246  static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247  				bool enable)
6248  {
6249  	u32 orig, data;
6250  
6251  	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252  
6253  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254  		data &= ~CLOCK_GATING_DIS;
6255  	else
6256  		data |= CLOCK_GATING_DIS;
6257  
6258  	if (orig != data)
6259  		WREG32(HDP_HOST_PATH_CNTL, data);
6260  }
6261  
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6262  static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263  			      bool enable)
6264  {
6265  	u32 orig, data;
6266  
6267  	orig = data = RREG32(HDP_MEM_POWER_LS);
6268  
6269  	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270  		data |= HDP_LS_ENABLE;
6271  	else
6272  		data &= ~HDP_LS_ENABLE;
6273  
6274  	if (orig != data)
6275  		WREG32(HDP_MEM_POWER_LS, data);
6276  }
6277  
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6278  void cik_update_cg(struct radeon_device *rdev,
6279  		   u32 block, bool enable)
6280  {
6281  
6282  	if (block & RADEON_CG_BLOCK_GFX) {
6283  		cik_enable_gui_idle_interrupt(rdev, false);
6284  		/* order matters! */
6285  		if (enable) {
6286  			cik_enable_mgcg(rdev, true);
6287  			cik_enable_cgcg(rdev, true);
6288  		} else {
6289  			cik_enable_cgcg(rdev, false);
6290  			cik_enable_mgcg(rdev, false);
6291  		}
6292  		cik_enable_gui_idle_interrupt(rdev, true);
6293  	}
6294  
6295  	if (block & RADEON_CG_BLOCK_MC) {
6296  		if (!(rdev->flags & RADEON_IS_IGP)) {
6297  			cik_enable_mc_mgcg(rdev, enable);
6298  			cik_enable_mc_ls(rdev, enable);
6299  		}
6300  	}
6301  
6302  	if (block & RADEON_CG_BLOCK_SDMA) {
6303  		cik_enable_sdma_mgcg(rdev, enable);
6304  		cik_enable_sdma_mgls(rdev, enable);
6305  	}
6306  
6307  	if (block & RADEON_CG_BLOCK_BIF) {
6308  		cik_enable_bif_mgls(rdev, enable);
6309  	}
6310  
6311  	if (block & RADEON_CG_BLOCK_UVD) {
6312  		if (rdev->has_uvd)
6313  			cik_enable_uvd_mgcg(rdev, enable);
6314  	}
6315  
6316  	if (block & RADEON_CG_BLOCK_HDP) {
6317  		cik_enable_hdp_mgcg(rdev, enable);
6318  		cik_enable_hdp_ls(rdev, enable);
6319  	}
6320  
6321  	if (block & RADEON_CG_BLOCK_VCE) {
6322  		vce_v2_0_enable_mgcg(rdev, enable);
6323  	}
6324  }
6325  
cik_init_cg(struct radeon_device * rdev)6326  static void cik_init_cg(struct radeon_device *rdev)
6327  {
6328  
6329  	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330  
6331  	if (rdev->has_uvd)
6332  		si_init_uvd_internal_cg(rdev);
6333  
6334  	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335  			     RADEON_CG_BLOCK_SDMA |
6336  			     RADEON_CG_BLOCK_BIF |
6337  			     RADEON_CG_BLOCK_UVD |
6338  			     RADEON_CG_BLOCK_HDP), true);
6339  }
6340  
cik_fini_cg(struct radeon_device * rdev)6341  static void cik_fini_cg(struct radeon_device *rdev)
6342  {
6343  	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344  			     RADEON_CG_BLOCK_SDMA |
6345  			     RADEON_CG_BLOCK_BIF |
6346  			     RADEON_CG_BLOCK_UVD |
6347  			     RADEON_CG_BLOCK_HDP), false);
6348  
6349  	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350  }
6351  
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6352  static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353  					  bool enable)
6354  {
6355  	u32 data, orig;
6356  
6357  	orig = data = RREG32(RLC_PG_CNTL);
6358  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359  		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360  	else
6361  		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362  	if (orig != data)
6363  		WREG32(RLC_PG_CNTL, data);
6364  }
6365  
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6366  static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367  					  bool enable)
6368  {
6369  	u32 data, orig;
6370  
6371  	orig = data = RREG32(RLC_PG_CNTL);
6372  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373  		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374  	else
6375  		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376  	if (orig != data)
6377  		WREG32(RLC_PG_CNTL, data);
6378  }
6379  
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6380  static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381  {
6382  	u32 data, orig;
6383  
6384  	orig = data = RREG32(RLC_PG_CNTL);
6385  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386  		data &= ~DISABLE_CP_PG;
6387  	else
6388  		data |= DISABLE_CP_PG;
6389  	if (orig != data)
6390  		WREG32(RLC_PG_CNTL, data);
6391  }
6392  
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6393  static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394  {
6395  	u32 data, orig;
6396  
6397  	orig = data = RREG32(RLC_PG_CNTL);
6398  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399  		data &= ~DISABLE_GDS_PG;
6400  	else
6401  		data |= DISABLE_GDS_PG;
6402  	if (orig != data)
6403  		WREG32(RLC_PG_CNTL, data);
6404  }
6405  
6406  #define CP_ME_TABLE_SIZE    96
6407  #define CP_ME_TABLE_OFFSET  2048
6408  #define CP_MEC_TABLE_OFFSET 4096
6409  
cik_init_cp_pg_table(struct radeon_device * rdev)6410  void cik_init_cp_pg_table(struct radeon_device *rdev)
6411  {
6412  	volatile u32 *dst_ptr;
6413  	int me, i, max_me = 4;
6414  	u32 bo_offset = 0;
6415  	u32 table_offset, table_size;
6416  
6417  	if (rdev->family == CHIP_KAVERI)
6418  		max_me = 5;
6419  
6420  	if (rdev->rlc.cp_table_ptr == NULL)
6421  		return;
6422  
6423  	/* write the cp table buffer */
6424  	dst_ptr = rdev->rlc.cp_table_ptr;
6425  	for (me = 0; me < max_me; me++) {
6426  		if (rdev->new_fw) {
6427  			const __le32 *fw_data;
6428  			const struct gfx_firmware_header_v1_0 *hdr;
6429  
6430  			if (me == 0) {
6431  				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432  				fw_data = (const __le32 *)
6433  					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434  				table_offset = le32_to_cpu(hdr->jt_offset);
6435  				table_size = le32_to_cpu(hdr->jt_size);
6436  			} else if (me == 1) {
6437  				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438  				fw_data = (const __le32 *)
6439  					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440  				table_offset = le32_to_cpu(hdr->jt_offset);
6441  				table_size = le32_to_cpu(hdr->jt_size);
6442  			} else if (me == 2) {
6443  				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444  				fw_data = (const __le32 *)
6445  					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446  				table_offset = le32_to_cpu(hdr->jt_offset);
6447  				table_size = le32_to_cpu(hdr->jt_size);
6448  			} else if (me == 3) {
6449  				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450  				fw_data = (const __le32 *)
6451  					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452  				table_offset = le32_to_cpu(hdr->jt_offset);
6453  				table_size = le32_to_cpu(hdr->jt_size);
6454  			} else {
6455  				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456  				fw_data = (const __le32 *)
6457  					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458  				table_offset = le32_to_cpu(hdr->jt_offset);
6459  				table_size = le32_to_cpu(hdr->jt_size);
6460  			}
6461  
6462  			for (i = 0; i < table_size; i ++) {
6463  				dst_ptr[bo_offset + i] =
6464  					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465  			}
6466  			bo_offset += table_size;
6467  		} else {
6468  			const __be32 *fw_data;
6469  			table_size = CP_ME_TABLE_SIZE;
6470  
6471  			if (me == 0) {
6472  				fw_data = (const __be32 *)rdev->ce_fw->data;
6473  				table_offset = CP_ME_TABLE_OFFSET;
6474  			} else if (me == 1) {
6475  				fw_data = (const __be32 *)rdev->pfp_fw->data;
6476  				table_offset = CP_ME_TABLE_OFFSET;
6477  			} else if (me == 2) {
6478  				fw_data = (const __be32 *)rdev->me_fw->data;
6479  				table_offset = CP_ME_TABLE_OFFSET;
6480  			} else {
6481  				fw_data = (const __be32 *)rdev->mec_fw->data;
6482  				table_offset = CP_MEC_TABLE_OFFSET;
6483  			}
6484  
6485  			for (i = 0; i < table_size; i ++) {
6486  				dst_ptr[bo_offset + i] =
6487  					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488  			}
6489  			bo_offset += table_size;
6490  		}
6491  	}
6492  }
6493  
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6494  static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495  				bool enable)
6496  {
6497  	u32 data, orig;
6498  
6499  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500  		orig = data = RREG32(RLC_PG_CNTL);
6501  		data |= GFX_PG_ENABLE;
6502  		if (orig != data)
6503  			WREG32(RLC_PG_CNTL, data);
6504  
6505  		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506  		data |= AUTO_PG_EN;
6507  		if (orig != data)
6508  			WREG32(RLC_AUTO_PG_CTRL, data);
6509  	} else {
6510  		orig = data = RREG32(RLC_PG_CNTL);
6511  		data &= ~GFX_PG_ENABLE;
6512  		if (orig != data)
6513  			WREG32(RLC_PG_CNTL, data);
6514  
6515  		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516  		data &= ~AUTO_PG_EN;
6517  		if (orig != data)
6518  			WREG32(RLC_AUTO_PG_CTRL, data);
6519  
6520  		data = RREG32(DB_RENDER_CONTROL);
6521  	}
6522  }
6523  
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6524  static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525  {
6526  	u32 mask = 0, tmp, tmp1;
6527  	int i;
6528  
6529  	cik_select_se_sh(rdev, se, sh);
6530  	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531  	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532  	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533  
6534  	tmp &= 0xffff0000;
6535  
6536  	tmp |= tmp1;
6537  	tmp >>= 16;
6538  
6539  	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540  		mask <<= 1;
6541  		mask |= 1;
6542  	}
6543  
6544  	return (~tmp) & mask;
6545  }
6546  
cik_init_ao_cu_mask(struct radeon_device * rdev)6547  static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548  {
6549  	u32 i, j, k, active_cu_number = 0;
6550  	u32 mask, counter, cu_bitmap;
6551  	u32 tmp = 0;
6552  
6553  	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554  		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555  			mask = 1;
6556  			cu_bitmap = 0;
6557  			counter = 0;
6558  			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559  				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560  					if (counter < 2)
6561  						cu_bitmap |= mask;
6562  					counter ++;
6563  				}
6564  				mask <<= 1;
6565  			}
6566  
6567  			active_cu_number += counter;
6568  			tmp |= (cu_bitmap << (i * 16 + j * 8));
6569  		}
6570  	}
6571  
6572  	WREG32(RLC_PG_AO_CU_MASK, tmp);
6573  
6574  	tmp = RREG32(RLC_MAX_PG_CU);
6575  	tmp &= ~MAX_PU_CU_MASK;
6576  	tmp |= MAX_PU_CU(active_cu_number);
6577  	WREG32(RLC_MAX_PG_CU, tmp);
6578  }
6579  
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6580  static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581  				       bool enable)
6582  {
6583  	u32 data, orig;
6584  
6585  	orig = data = RREG32(RLC_PG_CNTL);
6586  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587  		data |= STATIC_PER_CU_PG_ENABLE;
6588  	else
6589  		data &= ~STATIC_PER_CU_PG_ENABLE;
6590  	if (orig != data)
6591  		WREG32(RLC_PG_CNTL, data);
6592  }
6593  
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)6594  static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595  					bool enable)
6596  {
6597  	u32 data, orig;
6598  
6599  	orig = data = RREG32(RLC_PG_CNTL);
6600  	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601  		data |= DYN_PER_CU_PG_ENABLE;
6602  	else
6603  		data &= ~DYN_PER_CU_PG_ENABLE;
6604  	if (orig != data)
6605  		WREG32(RLC_PG_CNTL, data);
6606  }
6607  
6608  #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609  #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610  
cik_init_gfx_cgpg(struct radeon_device * rdev)6611  static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612  {
6613  	u32 data, orig;
6614  	u32 i;
6615  
6616  	if (rdev->rlc.cs_data) {
6617  		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618  		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619  		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620  		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621  	} else {
6622  		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623  		for (i = 0; i < 3; i++)
6624  			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625  	}
6626  	if (rdev->rlc.reg_list) {
6627  		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628  		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629  			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630  	}
6631  
6632  	orig = data = RREG32(RLC_PG_CNTL);
6633  	data |= GFX_PG_SRC;
6634  	if (orig != data)
6635  		WREG32(RLC_PG_CNTL, data);
6636  
6637  	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638  	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639  
6640  	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641  	data &= ~IDLE_POLL_COUNT_MASK;
6642  	data |= IDLE_POLL_COUNT(0x60);
6643  	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644  
6645  	data = 0x10101010;
6646  	WREG32(RLC_PG_DELAY, data);
6647  
6648  	data = RREG32(RLC_PG_DELAY_2);
6649  	data &= ~0xff;
6650  	data |= 0x3;
6651  	WREG32(RLC_PG_DELAY_2, data);
6652  
6653  	data = RREG32(RLC_AUTO_PG_CTRL);
6654  	data &= ~GRBM_REG_SGIT_MASK;
6655  	data |= GRBM_REG_SGIT(0x700);
6656  	WREG32(RLC_AUTO_PG_CTRL, data);
6657  
6658  }
6659  
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)6660  static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661  {
6662  	cik_enable_gfx_cgpg(rdev, enable);
6663  	cik_enable_gfx_static_mgpg(rdev, enable);
6664  	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665  }
6666  
cik_get_csb_size(struct radeon_device * rdev)6667  u32 cik_get_csb_size(struct radeon_device *rdev)
6668  {
6669  	u32 count = 0;
6670  	const struct cs_section_def *sect = NULL;
6671  	const struct cs_extent_def *ext = NULL;
6672  
6673  	if (rdev->rlc.cs_data == NULL)
6674  		return 0;
6675  
6676  	/* begin clear state */
6677  	count += 2;
6678  	/* context control state */
6679  	count += 3;
6680  
6681  	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682  		for (ext = sect->section; ext->extent != NULL; ++ext) {
6683  			if (sect->id == SECT_CONTEXT)
6684  				count += 2 + ext->reg_count;
6685  			else
6686  				return 0;
6687  		}
6688  	}
6689  	/* pa_sc_raster_config/pa_sc_raster_config1 */
6690  	count += 4;
6691  	/* end clear state */
6692  	count += 2;
6693  	/* clear state */
6694  	count += 2;
6695  
6696  	return count;
6697  }
6698  
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)6699  void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700  {
6701  	u32 count = 0, i;
6702  	const struct cs_section_def *sect = NULL;
6703  	const struct cs_extent_def *ext = NULL;
6704  
6705  	if (rdev->rlc.cs_data == NULL)
6706  		return;
6707  	if (buffer == NULL)
6708  		return;
6709  
6710  	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711  	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712  
6713  	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714  	buffer[count++] = cpu_to_le32(0x80000000);
6715  	buffer[count++] = cpu_to_le32(0x80000000);
6716  
6717  	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718  		for (ext = sect->section; ext->extent != NULL; ++ext) {
6719  			if (sect->id == SECT_CONTEXT) {
6720  				buffer[count++] =
6721  					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722  				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723  				for (i = 0; i < ext->reg_count; i++)
6724  					buffer[count++] = cpu_to_le32(ext->extent[i]);
6725  			} else {
6726  				return;
6727  			}
6728  		}
6729  	}
6730  
6731  	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732  	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733  	switch (rdev->family) {
6734  	case CHIP_BONAIRE:
6735  		buffer[count++] = cpu_to_le32(0x16000012);
6736  		buffer[count++] = cpu_to_le32(0x00000000);
6737  		break;
6738  	case CHIP_KAVERI:
6739  		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740  		buffer[count++] = cpu_to_le32(0x00000000);
6741  		break;
6742  	case CHIP_KABINI:
6743  	case CHIP_MULLINS:
6744  		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745  		buffer[count++] = cpu_to_le32(0x00000000);
6746  		break;
6747  	case CHIP_HAWAII:
6748  		buffer[count++] = cpu_to_le32(0x3a00161a);
6749  		buffer[count++] = cpu_to_le32(0x0000002e);
6750  		break;
6751  	default:
6752  		buffer[count++] = cpu_to_le32(0x00000000);
6753  		buffer[count++] = cpu_to_le32(0x00000000);
6754  		break;
6755  	}
6756  
6757  	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758  	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759  
6760  	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761  	buffer[count++] = cpu_to_le32(0);
6762  }
6763  
cik_init_pg(struct radeon_device * rdev)6764  static void cik_init_pg(struct radeon_device *rdev)
6765  {
6766  	if (rdev->pg_flags) {
6767  		cik_enable_sck_slowdown_on_pu(rdev, true);
6768  		cik_enable_sck_slowdown_on_pd(rdev, true);
6769  		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770  			cik_init_gfx_cgpg(rdev);
6771  			cik_enable_cp_pg(rdev, true);
6772  			cik_enable_gds_pg(rdev, true);
6773  		}
6774  		cik_init_ao_cu_mask(rdev);
6775  		cik_update_gfx_pg(rdev, true);
6776  	}
6777  }
6778  
cik_fini_pg(struct radeon_device * rdev)6779  static void cik_fini_pg(struct radeon_device *rdev)
6780  {
6781  	if (rdev->pg_flags) {
6782  		cik_update_gfx_pg(rdev, false);
6783  		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784  			cik_enable_cp_pg(rdev, false);
6785  			cik_enable_gds_pg(rdev, false);
6786  		}
6787  	}
6788  }
6789  
6790  /*
6791   * Interrupts
6792   * Starting with r6xx, interrupts are handled via a ring buffer.
6793   * Ring buffers are areas of GPU accessible memory that the GPU
6794   * writes interrupt vectors into and the host reads vectors out of.
6795   * There is a rptr (read pointer) that determines where the
6796   * host is currently reading, and a wptr (write pointer)
6797   * which determines where the GPU has written.  When the
6798   * pointers are equal, the ring is idle.  When the GPU
6799   * writes vectors to the ring buffer, it increments the
6800   * wptr.  When there is an interrupt, the host then starts
6801   * fetching commands and processing them until the pointers are
6802   * equal again at which point it updates the rptr.
6803   */
6804  
6805  /**
6806   * cik_enable_interrupts - Enable the interrupt ring buffer
6807   *
6808   * @rdev: radeon_device pointer
6809   *
6810   * Enable the interrupt ring buffer (CIK).
6811   */
cik_enable_interrupts(struct radeon_device * rdev)6812  static void cik_enable_interrupts(struct radeon_device *rdev)
6813  {
6814  	u32 ih_cntl = RREG32(IH_CNTL);
6815  	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816  
6817  	ih_cntl |= ENABLE_INTR;
6818  	ih_rb_cntl |= IH_RB_ENABLE;
6819  	WREG32(IH_CNTL, ih_cntl);
6820  	WREG32(IH_RB_CNTL, ih_rb_cntl);
6821  	rdev->ih.enabled = true;
6822  }
6823  
6824  /**
6825   * cik_disable_interrupts - Disable the interrupt ring buffer
6826   *
6827   * @rdev: radeon_device pointer
6828   *
6829   * Disable the interrupt ring buffer (CIK).
6830   */
cik_disable_interrupts(struct radeon_device * rdev)6831  static void cik_disable_interrupts(struct radeon_device *rdev)
6832  {
6833  	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834  	u32 ih_cntl = RREG32(IH_CNTL);
6835  
6836  	ih_rb_cntl &= ~IH_RB_ENABLE;
6837  	ih_cntl &= ~ENABLE_INTR;
6838  	WREG32(IH_RB_CNTL, ih_rb_cntl);
6839  	WREG32(IH_CNTL, ih_cntl);
6840  	/* set rptr, wptr to 0 */
6841  	WREG32(IH_RB_RPTR, 0);
6842  	WREG32(IH_RB_WPTR, 0);
6843  	rdev->ih.enabled = false;
6844  	rdev->ih.rptr = 0;
6845  }
6846  
6847  /**
6848   * cik_disable_interrupt_state - Disable all interrupt sources
6849   *
6850   * @rdev: radeon_device pointer
6851   *
6852   * Clear all interrupt enable bits used by the driver (CIK).
6853   */
cik_disable_interrupt_state(struct radeon_device * rdev)6854  static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855  {
6856  	u32 tmp;
6857  
6858  	/* gfx ring */
6859  	tmp = RREG32(CP_INT_CNTL_RING0) &
6860  		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861  	WREG32(CP_INT_CNTL_RING0, tmp);
6862  	/* sdma */
6863  	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864  	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865  	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866  	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867  	/* compute queues */
6868  	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869  	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870  	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871  	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872  	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873  	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874  	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875  	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876  	/* grbm */
6877  	WREG32(GRBM_INT_CNTL, 0);
6878  	/* SRBM */
6879  	WREG32(SRBM_INT_CNTL, 0);
6880  	/* vline/vblank, etc. */
6881  	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882  	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883  	if (rdev->num_crtc >= 4) {
6884  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886  	}
6887  	if (rdev->num_crtc >= 6) {
6888  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890  	}
6891  	/* pflip */
6892  	if (rdev->num_crtc >= 2) {
6893  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895  	}
6896  	if (rdev->num_crtc >= 4) {
6897  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899  	}
6900  	if (rdev->num_crtc >= 6) {
6901  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903  	}
6904  
6905  	/* dac hotplug */
6906  	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907  
6908  	/* digital hotplug */
6909  	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910  	WREG32(DC_HPD1_INT_CONTROL, tmp);
6911  	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912  	WREG32(DC_HPD2_INT_CONTROL, tmp);
6913  	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914  	WREG32(DC_HPD3_INT_CONTROL, tmp);
6915  	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916  	WREG32(DC_HPD4_INT_CONTROL, tmp);
6917  	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918  	WREG32(DC_HPD5_INT_CONTROL, tmp);
6919  	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920  	WREG32(DC_HPD6_INT_CONTROL, tmp);
6921  
6922  }
6923  
6924  /**
6925   * cik_irq_init - init and enable the interrupt ring
6926   *
6927   * @rdev: radeon_device pointer
6928   *
6929   * Allocate a ring buffer for the interrupt controller,
6930   * enable the RLC, disable interrupts, enable the IH
6931   * ring buffer and enable it (CIK).
6932   * Called at device load and reume.
6933   * Returns 0 for success, errors for failure.
6934   */
cik_irq_init(struct radeon_device * rdev)6935  static int cik_irq_init(struct radeon_device *rdev)
6936  {
6937  	int ret = 0;
6938  	int rb_bufsz;
6939  	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940  
6941  	/* allocate ring */
6942  	ret = r600_ih_ring_alloc(rdev);
6943  	if (ret)
6944  		return ret;
6945  
6946  	/* disable irqs */
6947  	cik_disable_interrupts(rdev);
6948  
6949  	/* init rlc */
6950  	ret = cik_rlc_resume(rdev);
6951  	if (ret) {
6952  		r600_ih_ring_fini(rdev);
6953  		return ret;
6954  	}
6955  
6956  	/* setup interrupt control */
6957  	/* set dummy read address to dummy page address */
6958  	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959  	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960  	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961  	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962  	 */
6963  	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964  	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965  	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966  	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967  
6968  	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969  	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970  
6971  	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972  		      IH_WPTR_OVERFLOW_CLEAR |
6973  		      (rb_bufsz << 1));
6974  
6975  	if (rdev->wb.enabled)
6976  		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977  
6978  	/* set the writeback address whether it's enabled or not */
6979  	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980  	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981  
6982  	WREG32(IH_RB_CNTL, ih_rb_cntl);
6983  
6984  	/* set rptr, wptr to 0 */
6985  	WREG32(IH_RB_RPTR, 0);
6986  	WREG32(IH_RB_WPTR, 0);
6987  
6988  	/* Default settings for IH_CNTL (disabled at first) */
6989  	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990  	/* RPTR_REARM only works if msi's are enabled */
6991  	if (rdev->msi_enabled)
6992  		ih_cntl |= RPTR_REARM;
6993  	WREG32(IH_CNTL, ih_cntl);
6994  
6995  	/* force the active interrupt state to all disabled */
6996  	cik_disable_interrupt_state(rdev);
6997  
6998  	pci_set_master(rdev->pdev);
6999  
7000  	/* enable irqs */
7001  	cik_enable_interrupts(rdev);
7002  
7003  	return ret;
7004  }
7005  
7006  /**
7007   * cik_irq_set - enable/disable interrupt sources
7008   *
7009   * @rdev: radeon_device pointer
7010   *
7011   * Enable interrupt sources on the GPU (vblanks, hpd,
7012   * etc.) (CIK).
7013   * Returns 0 for success, errors for failure.
7014   */
cik_irq_set(struct radeon_device * rdev)7015  int cik_irq_set(struct radeon_device *rdev)
7016  {
7017  	u32 cp_int_cntl;
7018  	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019  	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020  	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021  	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022  	u32 grbm_int_cntl = 0;
7023  	u32 dma_cntl, dma_cntl1;
7024  
7025  	if (!rdev->irq.installed) {
7026  		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027  		return -EINVAL;
7028  	}
7029  	/* don't enable anything if the ih is disabled */
7030  	if (!rdev->ih.enabled) {
7031  		cik_disable_interrupts(rdev);
7032  		/* force the active interrupt state to all disabled */
7033  		cik_disable_interrupt_state(rdev);
7034  		return 0;
7035  	}
7036  
7037  	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038  		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039  	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040  
7041  	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042  	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043  	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044  	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045  	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046  	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047  
7048  	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049  	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050  
7051  	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052  	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053  	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054  	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055  	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056  	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057  	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058  	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059  
7060  	/* enable CP interrupts on all rings */
7061  	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062  		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063  		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064  	}
7065  	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066  		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067  		DRM_DEBUG("si_irq_set: sw int cp1\n");
7068  		if (ring->me == 1) {
7069  			switch (ring->pipe) {
7070  			case 0:
7071  				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072  				break;
7073  			case 1:
7074  				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075  				break;
7076  			case 2:
7077  				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078  				break;
7079  			case 3:
7080  				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081  				break;
7082  			default:
7083  				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084  				break;
7085  			}
7086  		} else if (ring->me == 2) {
7087  			switch (ring->pipe) {
7088  			case 0:
7089  				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090  				break;
7091  			case 1:
7092  				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093  				break;
7094  			case 2:
7095  				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096  				break;
7097  			case 3:
7098  				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099  				break;
7100  			default:
7101  				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102  				break;
7103  			}
7104  		} else {
7105  			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106  		}
7107  	}
7108  	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109  		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110  		DRM_DEBUG("si_irq_set: sw int cp2\n");
7111  		if (ring->me == 1) {
7112  			switch (ring->pipe) {
7113  			case 0:
7114  				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115  				break;
7116  			case 1:
7117  				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118  				break;
7119  			case 2:
7120  				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121  				break;
7122  			case 3:
7123  				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124  				break;
7125  			default:
7126  				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127  				break;
7128  			}
7129  		} else if (ring->me == 2) {
7130  			switch (ring->pipe) {
7131  			case 0:
7132  				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133  				break;
7134  			case 1:
7135  				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136  				break;
7137  			case 2:
7138  				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139  				break;
7140  			case 3:
7141  				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142  				break;
7143  			default:
7144  				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145  				break;
7146  			}
7147  		} else {
7148  			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149  		}
7150  	}
7151  
7152  	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153  		DRM_DEBUG("cik_irq_set: sw int dma\n");
7154  		dma_cntl |= TRAP_ENABLE;
7155  	}
7156  
7157  	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158  		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159  		dma_cntl1 |= TRAP_ENABLE;
7160  	}
7161  
7162  	if (rdev->irq.crtc_vblank_int[0] ||
7163  	    atomic_read(&rdev->irq.pflip[0])) {
7164  		DRM_DEBUG("cik_irq_set: vblank 0\n");
7165  		crtc1 |= VBLANK_INTERRUPT_MASK;
7166  	}
7167  	if (rdev->irq.crtc_vblank_int[1] ||
7168  	    atomic_read(&rdev->irq.pflip[1])) {
7169  		DRM_DEBUG("cik_irq_set: vblank 1\n");
7170  		crtc2 |= VBLANK_INTERRUPT_MASK;
7171  	}
7172  	if (rdev->irq.crtc_vblank_int[2] ||
7173  	    atomic_read(&rdev->irq.pflip[2])) {
7174  		DRM_DEBUG("cik_irq_set: vblank 2\n");
7175  		crtc3 |= VBLANK_INTERRUPT_MASK;
7176  	}
7177  	if (rdev->irq.crtc_vblank_int[3] ||
7178  	    atomic_read(&rdev->irq.pflip[3])) {
7179  		DRM_DEBUG("cik_irq_set: vblank 3\n");
7180  		crtc4 |= VBLANK_INTERRUPT_MASK;
7181  	}
7182  	if (rdev->irq.crtc_vblank_int[4] ||
7183  	    atomic_read(&rdev->irq.pflip[4])) {
7184  		DRM_DEBUG("cik_irq_set: vblank 4\n");
7185  		crtc5 |= VBLANK_INTERRUPT_MASK;
7186  	}
7187  	if (rdev->irq.crtc_vblank_int[5] ||
7188  	    atomic_read(&rdev->irq.pflip[5])) {
7189  		DRM_DEBUG("cik_irq_set: vblank 5\n");
7190  		crtc6 |= VBLANK_INTERRUPT_MASK;
7191  	}
7192  	if (rdev->irq.hpd[0]) {
7193  		DRM_DEBUG("cik_irq_set: hpd 1\n");
7194  		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195  	}
7196  	if (rdev->irq.hpd[1]) {
7197  		DRM_DEBUG("cik_irq_set: hpd 2\n");
7198  		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199  	}
7200  	if (rdev->irq.hpd[2]) {
7201  		DRM_DEBUG("cik_irq_set: hpd 3\n");
7202  		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203  	}
7204  	if (rdev->irq.hpd[3]) {
7205  		DRM_DEBUG("cik_irq_set: hpd 4\n");
7206  		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207  	}
7208  	if (rdev->irq.hpd[4]) {
7209  		DRM_DEBUG("cik_irq_set: hpd 5\n");
7210  		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211  	}
7212  	if (rdev->irq.hpd[5]) {
7213  		DRM_DEBUG("cik_irq_set: hpd 6\n");
7214  		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215  	}
7216  
7217  	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218  
7219  	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220  	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221  
7222  	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223  	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224  	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225  	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226  	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227  	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228  	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229  	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230  
7231  	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232  
7233  	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234  	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235  	if (rdev->num_crtc >= 4) {
7236  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238  	}
7239  	if (rdev->num_crtc >= 6) {
7240  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241  		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242  	}
7243  
7244  	if (rdev->num_crtc >= 2) {
7245  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246  		       GRPH_PFLIP_INT_MASK);
7247  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248  		       GRPH_PFLIP_INT_MASK);
7249  	}
7250  	if (rdev->num_crtc >= 4) {
7251  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252  		       GRPH_PFLIP_INT_MASK);
7253  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254  		       GRPH_PFLIP_INT_MASK);
7255  	}
7256  	if (rdev->num_crtc >= 6) {
7257  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258  		       GRPH_PFLIP_INT_MASK);
7259  		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260  		       GRPH_PFLIP_INT_MASK);
7261  	}
7262  
7263  	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264  	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265  	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266  	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267  	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268  	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269  
7270  	/* posting read */
7271  	RREG32(SRBM_STATUS);
7272  
7273  	return 0;
7274  }
7275  
7276  /**
7277   * cik_irq_ack - ack interrupt sources
7278   *
7279   * @rdev: radeon_device pointer
7280   *
7281   * Ack interrupt sources on the GPU (vblanks, hpd,
7282   * etc.) (CIK).  Certain interrupts sources are sw
7283   * generated and do not require an explicit ack.
7284   */
cik_irq_ack(struct radeon_device * rdev)7285  static inline void cik_irq_ack(struct radeon_device *rdev)
7286  {
7287  	u32 tmp;
7288  
7289  	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290  	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291  	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292  	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293  	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294  	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295  	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296  
7297  	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298  		EVERGREEN_CRTC0_REGISTER_OFFSET);
7299  	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300  		EVERGREEN_CRTC1_REGISTER_OFFSET);
7301  	if (rdev->num_crtc >= 4) {
7302  		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303  			EVERGREEN_CRTC2_REGISTER_OFFSET);
7304  		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305  			EVERGREEN_CRTC3_REGISTER_OFFSET);
7306  	}
7307  	if (rdev->num_crtc >= 6) {
7308  		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309  			EVERGREEN_CRTC4_REGISTER_OFFSET);
7310  		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311  			EVERGREEN_CRTC5_REGISTER_OFFSET);
7312  	}
7313  
7314  	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315  		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316  		       GRPH_PFLIP_INT_CLEAR);
7317  	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318  		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319  		       GRPH_PFLIP_INT_CLEAR);
7320  	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321  		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322  	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323  		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324  	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325  		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326  	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327  		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328  
7329  	if (rdev->num_crtc >= 4) {
7330  		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331  			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332  			       GRPH_PFLIP_INT_CLEAR);
7333  		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334  			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335  			       GRPH_PFLIP_INT_CLEAR);
7336  		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337  			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338  		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339  			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340  		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341  			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342  		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343  			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344  	}
7345  
7346  	if (rdev->num_crtc >= 6) {
7347  		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348  			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349  			       GRPH_PFLIP_INT_CLEAR);
7350  		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351  			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352  			       GRPH_PFLIP_INT_CLEAR);
7353  		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354  			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355  		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356  			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357  		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358  			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359  		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360  			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361  	}
7362  
7363  	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364  		tmp = RREG32(DC_HPD1_INT_CONTROL);
7365  		tmp |= DC_HPDx_INT_ACK;
7366  		WREG32(DC_HPD1_INT_CONTROL, tmp);
7367  	}
7368  	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369  		tmp = RREG32(DC_HPD2_INT_CONTROL);
7370  		tmp |= DC_HPDx_INT_ACK;
7371  		WREG32(DC_HPD2_INT_CONTROL, tmp);
7372  	}
7373  	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374  		tmp = RREG32(DC_HPD3_INT_CONTROL);
7375  		tmp |= DC_HPDx_INT_ACK;
7376  		WREG32(DC_HPD3_INT_CONTROL, tmp);
7377  	}
7378  	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379  		tmp = RREG32(DC_HPD4_INT_CONTROL);
7380  		tmp |= DC_HPDx_INT_ACK;
7381  		WREG32(DC_HPD4_INT_CONTROL, tmp);
7382  	}
7383  	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384  		tmp = RREG32(DC_HPD5_INT_CONTROL);
7385  		tmp |= DC_HPDx_INT_ACK;
7386  		WREG32(DC_HPD5_INT_CONTROL, tmp);
7387  	}
7388  	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389  		tmp = RREG32(DC_HPD6_INT_CONTROL);
7390  		tmp |= DC_HPDx_INT_ACK;
7391  		WREG32(DC_HPD6_INT_CONTROL, tmp);
7392  	}
7393  	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394  		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395  		tmp |= DC_HPDx_RX_INT_ACK;
7396  		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397  	}
7398  	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399  		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400  		tmp |= DC_HPDx_RX_INT_ACK;
7401  		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402  	}
7403  	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404  		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405  		tmp |= DC_HPDx_RX_INT_ACK;
7406  		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407  	}
7408  	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409  		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410  		tmp |= DC_HPDx_RX_INT_ACK;
7411  		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412  	}
7413  	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414  		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415  		tmp |= DC_HPDx_RX_INT_ACK;
7416  		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417  	}
7418  	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419  		tmp = RREG32(DC_HPD6_INT_CONTROL);
7420  		tmp |= DC_HPDx_RX_INT_ACK;
7421  		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422  	}
7423  }
7424  
7425  /**
7426   * cik_irq_disable - disable interrupts
7427   *
7428   * @rdev: radeon_device pointer
7429   *
7430   * Disable interrupts on the hw (CIK).
7431   */
cik_irq_disable(struct radeon_device * rdev)7432  static void cik_irq_disable(struct radeon_device *rdev)
7433  {
7434  	cik_disable_interrupts(rdev);
7435  	/* Wait and acknowledge irq */
7436  	mdelay(1);
7437  	cik_irq_ack(rdev);
7438  	cik_disable_interrupt_state(rdev);
7439  }
7440  
7441  /**
7442   * cik_irq_suspend - disable interrupts for suspend
7443   *
7444   * @rdev: radeon_device pointer
7445   *
7446   * Disable interrupts and stop the RLC (CIK).
7447   * Used for suspend.
7448   */
cik_irq_suspend(struct radeon_device * rdev)7449  static void cik_irq_suspend(struct radeon_device *rdev)
7450  {
7451  	cik_irq_disable(rdev);
7452  	cik_rlc_stop(rdev);
7453  }
7454  
7455  /**
7456   * cik_irq_fini - tear down interrupt support
7457   *
7458   * @rdev: radeon_device pointer
7459   *
7460   * Disable interrupts on the hw and free the IH ring
7461   * buffer (CIK).
7462   * Used for driver unload.
7463   */
cik_irq_fini(struct radeon_device * rdev)7464  static void cik_irq_fini(struct radeon_device *rdev)
7465  {
7466  	cik_irq_suspend(rdev);
7467  	r600_ih_ring_fini(rdev);
7468  }
7469  
7470  /**
7471   * cik_get_ih_wptr - get the IH ring buffer wptr
7472   *
7473   * @rdev: radeon_device pointer
7474   *
7475   * Get the IH ring buffer wptr from either the register
7476   * or the writeback memory buffer (CIK).  Also check for
7477   * ring buffer overflow and deal with it.
7478   * Used by cik_irq_process().
7479   * Returns the value of the wptr.
7480   */
cik_get_ih_wptr(struct radeon_device * rdev)7481  static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482  {
7483  	u32 wptr, tmp;
7484  
7485  	if (rdev->wb.enabled)
7486  		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487  	else
7488  		wptr = RREG32(IH_RB_WPTR);
7489  
7490  	if (wptr & RB_OVERFLOW) {
7491  		wptr &= ~RB_OVERFLOW;
7492  		/* When a ring buffer overflow happen start parsing interrupt
7493  		 * from the last not overwritten vector (wptr + 16). Hopefully
7494  		 * this should allow us to catchup.
7495  		 */
7496  		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497  			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498  		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499  		tmp = RREG32(IH_RB_CNTL);
7500  		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501  		WREG32(IH_RB_CNTL, tmp);
7502  	}
7503  	return (wptr & rdev->ih.ptr_mask);
7504  }
7505  
7506  /*        CIK IV Ring
7507   * Each IV ring entry is 128 bits:
7508   * [7:0]    - interrupt source id
7509   * [31:8]   - reserved
7510   * [59:32]  - interrupt source data
7511   * [63:60]  - reserved
7512   * [71:64]  - RINGID
7513   *            CP:
7514   *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515   *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516   *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517   *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518   *            PIPE_ID - ME0 0=3D
7519   *                    - ME1&2 compute dispatcher (4 pipes each)
7520   *            SDMA:
7521   *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522   *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523   *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524   * [79:72]  - VMID
7525   * [95:80]  - PASID
7526   * [127:96] - reserved
7527   */
7528  /**
7529   * cik_irq_process - interrupt handler
7530   *
7531   * @rdev: radeon_device pointer
7532   *
7533   * Interrupt hander (CIK).  Walk the IH ring,
7534   * ack interrupts and schedule work to handle
7535   * interrupt events.
7536   * Returns irq process return code.
7537   */
cik_irq_process(struct radeon_device * rdev)7538  int cik_irq_process(struct radeon_device *rdev)
7539  {
7540  	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541  	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542  	u32 wptr;
7543  	u32 rptr;
7544  	u32 src_id, src_data, ring_id;
7545  	u8 me_id, pipe_id, queue_id;
7546  	u32 ring_index;
7547  	bool queue_hotplug = false;
7548  	bool queue_dp = false;
7549  	bool queue_reset = false;
7550  	u32 addr, status, mc_client;
7551  	bool queue_thermal = false;
7552  
7553  	if (!rdev->ih.enabled || rdev->shutdown)
7554  		return IRQ_NONE;
7555  
7556  	wptr = cik_get_ih_wptr(rdev);
7557  
7558  restart_ih:
7559  	/* is somebody else already processing irqs? */
7560  	if (atomic_xchg(&rdev->ih.lock, 1))
7561  		return IRQ_NONE;
7562  
7563  	rptr = rdev->ih.rptr;
7564  	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565  
7566  	/* Order reading of wptr vs. reading of IH ring data */
7567  	rmb();
7568  
7569  	/* display interrupts */
7570  	cik_irq_ack(rdev);
7571  
7572  	while (rptr != wptr) {
7573  		/* wptr/rptr are in bytes! */
7574  		ring_index = rptr / 4;
7575  
7576  		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577  		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578  		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579  
7580  		switch (src_id) {
7581  		case 1: /* D1 vblank/vline */
7582  			switch (src_data) {
7583  			case 0: /* D1 vblank */
7584  				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586  
7587  				if (rdev->irq.crtc_vblank_int[0]) {
7588  					drm_handle_vblank(rdev_to_drm(rdev), 0);
7589  					rdev->pm.vblank_sync = true;
7590  					wake_up(&rdev->irq.vblank_queue);
7591  				}
7592  				if (atomic_read(&rdev->irq.pflip[0]))
7593  					radeon_crtc_handle_vblank(rdev, 0);
7594  				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595  				DRM_DEBUG("IH: D1 vblank\n");
7596  
7597  				break;
7598  			case 1: /* D1 vline */
7599  				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601  
7602  				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603  				DRM_DEBUG("IH: D1 vline\n");
7604  
7605  				break;
7606  			default:
7607  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608  				break;
7609  			}
7610  			break;
7611  		case 2: /* D2 vblank/vline */
7612  			switch (src_data) {
7613  			case 0: /* D2 vblank */
7614  				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616  
7617  				if (rdev->irq.crtc_vblank_int[1]) {
7618  					drm_handle_vblank(rdev_to_drm(rdev), 1);
7619  					rdev->pm.vblank_sync = true;
7620  					wake_up(&rdev->irq.vblank_queue);
7621  				}
7622  				if (atomic_read(&rdev->irq.pflip[1]))
7623  					radeon_crtc_handle_vblank(rdev, 1);
7624  				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625  				DRM_DEBUG("IH: D2 vblank\n");
7626  
7627  				break;
7628  			case 1: /* D2 vline */
7629  				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631  
7632  				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633  				DRM_DEBUG("IH: D2 vline\n");
7634  
7635  				break;
7636  			default:
7637  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638  				break;
7639  			}
7640  			break;
7641  		case 3: /* D3 vblank/vline */
7642  			switch (src_data) {
7643  			case 0: /* D3 vblank */
7644  				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646  
7647  				if (rdev->irq.crtc_vblank_int[2]) {
7648  					drm_handle_vblank(rdev_to_drm(rdev), 2);
7649  					rdev->pm.vblank_sync = true;
7650  					wake_up(&rdev->irq.vblank_queue);
7651  				}
7652  				if (atomic_read(&rdev->irq.pflip[2]))
7653  					radeon_crtc_handle_vblank(rdev, 2);
7654  				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655  				DRM_DEBUG("IH: D3 vblank\n");
7656  
7657  				break;
7658  			case 1: /* D3 vline */
7659  				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661  
7662  				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663  				DRM_DEBUG("IH: D3 vline\n");
7664  
7665  				break;
7666  			default:
7667  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668  				break;
7669  			}
7670  			break;
7671  		case 4: /* D4 vblank/vline */
7672  			switch (src_data) {
7673  			case 0: /* D4 vblank */
7674  				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676  
7677  				if (rdev->irq.crtc_vblank_int[3]) {
7678  					drm_handle_vblank(rdev_to_drm(rdev), 3);
7679  					rdev->pm.vblank_sync = true;
7680  					wake_up(&rdev->irq.vblank_queue);
7681  				}
7682  				if (atomic_read(&rdev->irq.pflip[3]))
7683  					radeon_crtc_handle_vblank(rdev, 3);
7684  				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685  				DRM_DEBUG("IH: D4 vblank\n");
7686  
7687  				break;
7688  			case 1: /* D4 vline */
7689  				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691  
7692  				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693  				DRM_DEBUG("IH: D4 vline\n");
7694  
7695  				break;
7696  			default:
7697  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698  				break;
7699  			}
7700  			break;
7701  		case 5: /* D5 vblank/vline */
7702  			switch (src_data) {
7703  			case 0: /* D5 vblank */
7704  				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706  
7707  				if (rdev->irq.crtc_vblank_int[4]) {
7708  					drm_handle_vblank(rdev_to_drm(rdev), 4);
7709  					rdev->pm.vblank_sync = true;
7710  					wake_up(&rdev->irq.vblank_queue);
7711  				}
7712  				if (atomic_read(&rdev->irq.pflip[4]))
7713  					radeon_crtc_handle_vblank(rdev, 4);
7714  				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715  				DRM_DEBUG("IH: D5 vblank\n");
7716  
7717  				break;
7718  			case 1: /* D5 vline */
7719  				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721  
7722  				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723  				DRM_DEBUG("IH: D5 vline\n");
7724  
7725  				break;
7726  			default:
7727  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728  				break;
7729  			}
7730  			break;
7731  		case 6: /* D6 vblank/vline */
7732  			switch (src_data) {
7733  			case 0: /* D6 vblank */
7734  				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736  
7737  				if (rdev->irq.crtc_vblank_int[5]) {
7738  					drm_handle_vblank(rdev_to_drm(rdev), 5);
7739  					rdev->pm.vblank_sync = true;
7740  					wake_up(&rdev->irq.vblank_queue);
7741  				}
7742  				if (atomic_read(&rdev->irq.pflip[5]))
7743  					radeon_crtc_handle_vblank(rdev, 5);
7744  				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745  				DRM_DEBUG("IH: D6 vblank\n");
7746  
7747  				break;
7748  			case 1: /* D6 vline */
7749  				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751  
7752  				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753  				DRM_DEBUG("IH: D6 vline\n");
7754  
7755  				break;
7756  			default:
7757  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758  				break;
7759  			}
7760  			break;
7761  		case 8: /* D1 page flip */
7762  		case 10: /* D2 page flip */
7763  		case 12: /* D3 page flip */
7764  		case 14: /* D4 page flip */
7765  		case 16: /* D5 page flip */
7766  		case 18: /* D6 page flip */
7767  			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768  			if (radeon_use_pflipirq > 0)
7769  				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770  			break;
7771  		case 42: /* HPD hotplug */
7772  			switch (src_data) {
7773  			case 0:
7774  				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776  
7777  				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778  				queue_hotplug = true;
7779  				DRM_DEBUG("IH: HPD1\n");
7780  
7781  				break;
7782  			case 1:
7783  				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785  
7786  				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787  				queue_hotplug = true;
7788  				DRM_DEBUG("IH: HPD2\n");
7789  
7790  				break;
7791  			case 2:
7792  				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794  
7795  				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796  				queue_hotplug = true;
7797  				DRM_DEBUG("IH: HPD3\n");
7798  
7799  				break;
7800  			case 3:
7801  				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803  
7804  				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805  				queue_hotplug = true;
7806  				DRM_DEBUG("IH: HPD4\n");
7807  
7808  				break;
7809  			case 4:
7810  				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812  
7813  				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814  				queue_hotplug = true;
7815  				DRM_DEBUG("IH: HPD5\n");
7816  
7817  				break;
7818  			case 5:
7819  				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821  
7822  				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823  				queue_hotplug = true;
7824  				DRM_DEBUG("IH: HPD6\n");
7825  
7826  				break;
7827  			case 6:
7828  				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830  
7831  				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832  				queue_dp = true;
7833  				DRM_DEBUG("IH: HPD_RX 1\n");
7834  
7835  				break;
7836  			case 7:
7837  				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839  
7840  				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841  				queue_dp = true;
7842  				DRM_DEBUG("IH: HPD_RX 2\n");
7843  
7844  				break;
7845  			case 8:
7846  				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848  
7849  				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850  				queue_dp = true;
7851  				DRM_DEBUG("IH: HPD_RX 3\n");
7852  
7853  				break;
7854  			case 9:
7855  				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857  
7858  				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859  				queue_dp = true;
7860  				DRM_DEBUG("IH: HPD_RX 4\n");
7861  
7862  				break;
7863  			case 10:
7864  				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866  
7867  				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868  				queue_dp = true;
7869  				DRM_DEBUG("IH: HPD_RX 5\n");
7870  
7871  				break;
7872  			case 11:
7873  				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874  					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875  
7876  				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877  				queue_dp = true;
7878  				DRM_DEBUG("IH: HPD_RX 6\n");
7879  
7880  				break;
7881  			default:
7882  				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883  				break;
7884  			}
7885  			break;
7886  		case 96:
7887  			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888  			WREG32(SRBM_INT_ACK, 0x1);
7889  			break;
7890  		case 124: /* UVD */
7891  			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892  			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893  			break;
7894  		case 146:
7895  		case 147:
7896  			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897  			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898  			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899  			/* reset addr and status */
7900  			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901  			if (addr == 0x0 && status == 0x0)
7902  				break;
7903  			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904  			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905  				addr);
7906  			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907  				status);
7908  			cik_vm_decode_fault(rdev, status, addr, mc_client);
7909  			break;
7910  		case 167: /* VCE */
7911  			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912  			switch (src_data) {
7913  			case 0:
7914  				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915  				break;
7916  			case 1:
7917  				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918  				break;
7919  			default:
7920  				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921  				break;
7922  			}
7923  			break;
7924  		case 176: /* GFX RB CP_INT */
7925  		case 177: /* GFX IB CP_INT */
7926  			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927  			break;
7928  		case 181: /* CP EOP event */
7929  			DRM_DEBUG("IH: CP EOP\n");
7930  			/* XXX check the bitfield order! */
7931  			me_id = (ring_id & 0x60) >> 5;
7932  			pipe_id = (ring_id & 0x18) >> 3;
7933  			queue_id = (ring_id & 0x7) >> 0;
7934  			switch (me_id) {
7935  			case 0:
7936  				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937  				break;
7938  			case 1:
7939  			case 2:
7940  				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941  					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942  				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943  					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944  				break;
7945  			}
7946  			break;
7947  		case 184: /* CP Privileged reg access */
7948  			DRM_ERROR("Illegal register access in command stream\n");
7949  			/* XXX check the bitfield order! */
7950  			me_id = (ring_id & 0x60) >> 5;
7951  			switch (me_id) {
7952  			case 0:
7953  				/* This results in a full GPU reset, but all we need to do is soft
7954  				 * reset the CP for gfx
7955  				 */
7956  				queue_reset = true;
7957  				break;
7958  			case 1:
7959  				/* XXX compute */
7960  				queue_reset = true;
7961  				break;
7962  			case 2:
7963  				/* XXX compute */
7964  				queue_reset = true;
7965  				break;
7966  			}
7967  			break;
7968  		case 185: /* CP Privileged inst */
7969  			DRM_ERROR("Illegal instruction in command stream\n");
7970  			/* XXX check the bitfield order! */
7971  			me_id = (ring_id & 0x60) >> 5;
7972  			switch (me_id) {
7973  			case 0:
7974  				/* This results in a full GPU reset, but all we need to do is soft
7975  				 * reset the CP for gfx
7976  				 */
7977  				queue_reset = true;
7978  				break;
7979  			case 1:
7980  				/* XXX compute */
7981  				queue_reset = true;
7982  				break;
7983  			case 2:
7984  				/* XXX compute */
7985  				queue_reset = true;
7986  				break;
7987  			}
7988  			break;
7989  		case 224: /* SDMA trap event */
7990  			/* XXX check the bitfield order! */
7991  			me_id = (ring_id & 0x3) >> 0;
7992  			queue_id = (ring_id & 0xc) >> 2;
7993  			DRM_DEBUG("IH: SDMA trap\n");
7994  			switch (me_id) {
7995  			case 0:
7996  				switch (queue_id) {
7997  				case 0:
7998  					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999  					break;
8000  				case 1:
8001  					/* XXX compute */
8002  					break;
8003  				case 2:
8004  					/* XXX compute */
8005  					break;
8006  				}
8007  				break;
8008  			case 1:
8009  				switch (queue_id) {
8010  				case 0:
8011  					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012  					break;
8013  				case 1:
8014  					/* XXX compute */
8015  					break;
8016  				case 2:
8017  					/* XXX compute */
8018  					break;
8019  				}
8020  				break;
8021  			}
8022  			break;
8023  		case 230: /* thermal low to high */
8024  			DRM_DEBUG("IH: thermal low to high\n");
8025  			rdev->pm.dpm.thermal.high_to_low = false;
8026  			queue_thermal = true;
8027  			break;
8028  		case 231: /* thermal high to low */
8029  			DRM_DEBUG("IH: thermal high to low\n");
8030  			rdev->pm.dpm.thermal.high_to_low = true;
8031  			queue_thermal = true;
8032  			break;
8033  		case 233: /* GUI IDLE */
8034  			DRM_DEBUG("IH: GUI idle\n");
8035  			break;
8036  		case 241: /* SDMA Privileged inst */
8037  		case 247: /* SDMA Privileged inst */
8038  			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039  			/* XXX check the bitfield order! */
8040  			me_id = (ring_id & 0x3) >> 0;
8041  			queue_id = (ring_id & 0xc) >> 2;
8042  			switch (me_id) {
8043  			case 0:
8044  				switch (queue_id) {
8045  				case 0:
8046  					queue_reset = true;
8047  					break;
8048  				case 1:
8049  					/* XXX compute */
8050  					queue_reset = true;
8051  					break;
8052  				case 2:
8053  					/* XXX compute */
8054  					queue_reset = true;
8055  					break;
8056  				}
8057  				break;
8058  			case 1:
8059  				switch (queue_id) {
8060  				case 0:
8061  					queue_reset = true;
8062  					break;
8063  				case 1:
8064  					/* XXX compute */
8065  					queue_reset = true;
8066  					break;
8067  				case 2:
8068  					/* XXX compute */
8069  					queue_reset = true;
8070  					break;
8071  				}
8072  				break;
8073  			}
8074  			break;
8075  		default:
8076  			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077  			break;
8078  		}
8079  
8080  		/* wptr/rptr are in bytes! */
8081  		rptr += 16;
8082  		rptr &= rdev->ih.ptr_mask;
8083  		WREG32(IH_RB_RPTR, rptr);
8084  	}
8085  	if (queue_dp)
8086  		schedule_work(&rdev->dp_work);
8087  	if (queue_hotplug)
8088  		schedule_delayed_work(&rdev->hotplug_work, 0);
8089  	if (queue_reset) {
8090  		rdev->needs_reset = true;
8091  		wake_up_all(&rdev->fence_queue);
8092  	}
8093  	if (queue_thermal)
8094  		schedule_work(&rdev->pm.dpm.thermal.work);
8095  	rdev->ih.rptr = rptr;
8096  	atomic_set(&rdev->ih.lock, 0);
8097  
8098  	/* make sure wptr hasn't changed while processing */
8099  	wptr = cik_get_ih_wptr(rdev);
8100  	if (wptr != rptr)
8101  		goto restart_ih;
8102  
8103  	return IRQ_HANDLED;
8104  }
8105  
8106  /*
8107   * startup/shutdown callbacks
8108   */
cik_uvd_init(struct radeon_device * rdev)8109  static void cik_uvd_init(struct radeon_device *rdev)
8110  {
8111  	int r;
8112  
8113  	if (!rdev->has_uvd)
8114  		return;
8115  
8116  	r = radeon_uvd_init(rdev);
8117  	if (r) {
8118  		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119  		/*
8120  		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121  		 * to early fails cik_uvd_start() and thus nothing happens
8122  		 * there. So it is pointless to try to go through that code
8123  		 * hence why we disable uvd here.
8124  		 */
8125  		rdev->has_uvd = false;
8126  		return;
8127  	}
8128  	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129  	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130  }
8131  
cik_uvd_start(struct radeon_device * rdev)8132  static void cik_uvd_start(struct radeon_device *rdev)
8133  {
8134  	int r;
8135  
8136  	if (!rdev->has_uvd)
8137  		return;
8138  
8139  	r = radeon_uvd_resume(rdev);
8140  	if (r) {
8141  		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142  		goto error;
8143  	}
8144  	r = uvd_v4_2_resume(rdev);
8145  	if (r) {
8146  		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147  		goto error;
8148  	}
8149  	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150  	if (r) {
8151  		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152  		goto error;
8153  	}
8154  	return;
8155  
8156  error:
8157  	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158  }
8159  
cik_uvd_resume(struct radeon_device * rdev)8160  static void cik_uvd_resume(struct radeon_device *rdev)
8161  {
8162  	struct radeon_ring *ring;
8163  	int r;
8164  
8165  	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166  		return;
8167  
8168  	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169  	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170  	if (r) {
8171  		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172  		return;
8173  	}
8174  	r = uvd_v1_0_init(rdev);
8175  	if (r) {
8176  		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177  		return;
8178  	}
8179  }
8180  
cik_vce_init(struct radeon_device * rdev)8181  static void cik_vce_init(struct radeon_device *rdev)
8182  {
8183  	int r;
8184  
8185  	if (!rdev->has_vce)
8186  		return;
8187  
8188  	r = radeon_vce_init(rdev);
8189  	if (r) {
8190  		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191  		/*
8192  		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193  		 * to early fails cik_vce_start() and thus nothing happens
8194  		 * there. So it is pointless to try to go through that code
8195  		 * hence why we disable vce here.
8196  		 */
8197  		rdev->has_vce = false;
8198  		return;
8199  	}
8200  	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201  	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202  	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203  	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204  }
8205  
cik_vce_start(struct radeon_device * rdev)8206  static void cik_vce_start(struct radeon_device *rdev)
8207  {
8208  	int r;
8209  
8210  	if (!rdev->has_vce)
8211  		return;
8212  
8213  	r = radeon_vce_resume(rdev);
8214  	if (r) {
8215  		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216  		goto error;
8217  	}
8218  	r = vce_v2_0_resume(rdev);
8219  	if (r) {
8220  		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221  		goto error;
8222  	}
8223  	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224  	if (r) {
8225  		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226  		goto error;
8227  	}
8228  	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229  	if (r) {
8230  		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231  		goto error;
8232  	}
8233  	return;
8234  
8235  error:
8236  	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237  	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238  }
8239  
cik_vce_resume(struct radeon_device * rdev)8240  static void cik_vce_resume(struct radeon_device *rdev)
8241  {
8242  	struct radeon_ring *ring;
8243  	int r;
8244  
8245  	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246  		return;
8247  
8248  	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249  	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250  	if (r) {
8251  		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252  		return;
8253  	}
8254  	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255  	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256  	if (r) {
8257  		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258  		return;
8259  	}
8260  	r = vce_v1_0_init(rdev);
8261  	if (r) {
8262  		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263  		return;
8264  	}
8265  }
8266  
8267  /**
8268   * cik_startup - program the asic to a functional state
8269   *
8270   * @rdev: radeon_device pointer
8271   *
8272   * Programs the asic to a functional state (CIK).
8273   * Called by cik_init() and cik_resume().
8274   * Returns 0 for success, error for failure.
8275   */
cik_startup(struct radeon_device * rdev)8276  static int cik_startup(struct radeon_device *rdev)
8277  {
8278  	struct radeon_ring *ring;
8279  	u32 nop;
8280  	int r;
8281  
8282  	/* enable pcie gen2/3 link */
8283  	cik_pcie_gen3_enable(rdev);
8284  	/* enable aspm */
8285  	cik_program_aspm(rdev);
8286  
8287  	/* scratch needs to be initialized before MC */
8288  	r = r600_vram_scratch_init(rdev);
8289  	if (r)
8290  		return r;
8291  
8292  	cik_mc_program(rdev);
8293  
8294  	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295  		r = ci_mc_load_microcode(rdev);
8296  		if (r) {
8297  			DRM_ERROR("Failed to load MC firmware!\n");
8298  			return r;
8299  		}
8300  	}
8301  
8302  	r = cik_pcie_gart_enable(rdev);
8303  	if (r)
8304  		return r;
8305  	cik_gpu_init(rdev);
8306  
8307  	/* allocate rlc buffers */
8308  	if (rdev->flags & RADEON_IS_IGP) {
8309  		if (rdev->family == CHIP_KAVERI) {
8310  			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311  			rdev->rlc.reg_list_size =
8312  				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313  		} else {
8314  			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315  			rdev->rlc.reg_list_size =
8316  				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317  		}
8318  	}
8319  	rdev->rlc.cs_data = ci_cs_data;
8320  	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321  	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322  	r = sumo_rlc_init(rdev);
8323  	if (r) {
8324  		DRM_ERROR("Failed to init rlc BOs!\n");
8325  		return r;
8326  	}
8327  
8328  	/* allocate wb buffer */
8329  	r = radeon_wb_init(rdev);
8330  	if (r)
8331  		return r;
8332  
8333  	/* allocate mec buffers */
8334  	r = cik_mec_init(rdev);
8335  	if (r) {
8336  		DRM_ERROR("Failed to init MEC BOs!\n");
8337  		return r;
8338  	}
8339  
8340  	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341  	if (r) {
8342  		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343  		return r;
8344  	}
8345  
8346  	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347  	if (r) {
8348  		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349  		return r;
8350  	}
8351  
8352  	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353  	if (r) {
8354  		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355  		return r;
8356  	}
8357  
8358  	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359  	if (r) {
8360  		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361  		return r;
8362  	}
8363  
8364  	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365  	if (r) {
8366  		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367  		return r;
8368  	}
8369  
8370  	cik_uvd_start(rdev);
8371  	cik_vce_start(rdev);
8372  
8373  	/* Enable IRQ */
8374  	if (!rdev->irq.installed) {
8375  		r = radeon_irq_kms_init(rdev);
8376  		if (r)
8377  			return r;
8378  	}
8379  
8380  	r = cik_irq_init(rdev);
8381  	if (r) {
8382  		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383  		radeon_irq_kms_fini(rdev);
8384  		return r;
8385  	}
8386  	cik_irq_set(rdev);
8387  
8388  	if (rdev->family == CHIP_HAWAII) {
8389  		if (rdev->new_fw)
8390  			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391  		else
8392  			nop = RADEON_CP_PACKET2;
8393  	} else {
8394  		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395  	}
8396  
8397  	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398  	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399  			     nop);
8400  	if (r)
8401  		return r;
8402  
8403  	/* set up the compute queues */
8404  	/* type-2 packets are deprecated on MEC, use type-3 instead */
8405  	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406  	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407  			     nop);
8408  	if (r)
8409  		return r;
8410  	ring->me = 1; /* first MEC */
8411  	ring->pipe = 0; /* first pipe */
8412  	ring->queue = 0; /* first queue */
8413  	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414  
8415  	/* type-2 packets are deprecated on MEC, use type-3 instead */
8416  	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417  	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418  			     nop);
8419  	if (r)
8420  		return r;
8421  	/* dGPU only have 1 MEC */
8422  	ring->me = 1; /* first MEC */
8423  	ring->pipe = 0; /* first pipe */
8424  	ring->queue = 1; /* second queue */
8425  	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426  
8427  	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428  	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429  			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430  	if (r)
8431  		return r;
8432  
8433  	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434  	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435  			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436  	if (r)
8437  		return r;
8438  
8439  	r = cik_cp_resume(rdev);
8440  	if (r)
8441  		return r;
8442  
8443  	r = cik_sdma_resume(rdev);
8444  	if (r)
8445  		return r;
8446  
8447  	cik_uvd_resume(rdev);
8448  	cik_vce_resume(rdev);
8449  
8450  	r = radeon_ib_pool_init(rdev);
8451  	if (r) {
8452  		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453  		return r;
8454  	}
8455  
8456  	r = radeon_vm_manager_init(rdev);
8457  	if (r) {
8458  		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459  		return r;
8460  	}
8461  
8462  	r = radeon_audio_init(rdev);
8463  	if (r)
8464  		return r;
8465  
8466  	return 0;
8467  }
8468  
8469  /**
8470   * cik_resume - resume the asic to a functional state
8471   *
8472   * @rdev: radeon_device pointer
8473   *
8474   * Programs the asic to a functional state (CIK).
8475   * Called at resume.
8476   * Returns 0 for success, error for failure.
8477   */
cik_resume(struct radeon_device * rdev)8478  int cik_resume(struct radeon_device *rdev)
8479  {
8480  	int r;
8481  
8482  	/* post card */
8483  	atom_asic_init(rdev->mode_info.atom_context);
8484  
8485  	/* init golden registers */
8486  	cik_init_golden_registers(rdev);
8487  
8488  	if (rdev->pm.pm_method == PM_METHOD_DPM)
8489  		radeon_pm_resume(rdev);
8490  
8491  	rdev->accel_working = true;
8492  	r = cik_startup(rdev);
8493  	if (r) {
8494  		DRM_ERROR("cik startup failed on resume\n");
8495  		rdev->accel_working = false;
8496  		return r;
8497  	}
8498  
8499  	return r;
8500  
8501  }
8502  
8503  /**
8504   * cik_suspend - suspend the asic
8505   *
8506   * @rdev: radeon_device pointer
8507   *
8508   * Bring the chip into a state suitable for suspend (CIK).
8509   * Called at suspend.
8510   * Returns 0 for success.
8511   */
cik_suspend(struct radeon_device * rdev)8512  int cik_suspend(struct radeon_device *rdev)
8513  {
8514  	radeon_pm_suspend(rdev);
8515  	radeon_audio_fini(rdev);
8516  	radeon_vm_manager_fini(rdev);
8517  	cik_cp_enable(rdev, false);
8518  	cik_sdma_enable(rdev, false);
8519  	if (rdev->has_uvd) {
8520  		radeon_uvd_suspend(rdev);
8521  		uvd_v1_0_fini(rdev);
8522  	}
8523  	if (rdev->has_vce)
8524  		radeon_vce_suspend(rdev);
8525  	cik_fini_pg(rdev);
8526  	cik_fini_cg(rdev);
8527  	cik_irq_suspend(rdev);
8528  	radeon_wb_disable(rdev);
8529  	cik_pcie_gart_disable(rdev);
8530  	return 0;
8531  }
8532  
8533  /* Plan is to move initialization in that function and use
8534   * helper function so that radeon_device_init pretty much
8535   * do nothing more than calling asic specific function. This
8536   * should also allow to remove a bunch of callback function
8537   * like vram_info.
8538   */
8539  /**
8540   * cik_init - asic specific driver and hw init
8541   *
8542   * @rdev: radeon_device pointer
8543   *
8544   * Setup asic specific driver variables and program the hw
8545   * to a functional state (CIK).
8546   * Called at driver startup.
8547   * Returns 0 for success, errors for failure.
8548   */
cik_init(struct radeon_device * rdev)8549  int cik_init(struct radeon_device *rdev)
8550  {
8551  	struct radeon_ring *ring;
8552  	int r;
8553  
8554  	/* Read BIOS */
8555  	if (!radeon_get_bios(rdev)) {
8556  		if (ASIC_IS_AVIVO(rdev))
8557  			return -EINVAL;
8558  	}
8559  	/* Must be an ATOMBIOS */
8560  	if (!rdev->is_atom_bios) {
8561  		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562  		return -EINVAL;
8563  	}
8564  	r = radeon_atombios_init(rdev);
8565  	if (r)
8566  		return r;
8567  
8568  	/* Post card if necessary */
8569  	if (!radeon_card_posted(rdev)) {
8570  		if (!rdev->bios) {
8571  			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572  			return -EINVAL;
8573  		}
8574  		DRM_INFO("GPU not posted. posting now...\n");
8575  		atom_asic_init(rdev->mode_info.atom_context);
8576  	}
8577  	/* init golden registers */
8578  	cik_init_golden_registers(rdev);
8579  	/* Initialize scratch registers */
8580  	cik_scratch_init(rdev);
8581  	/* Initialize surface registers */
8582  	radeon_surface_init(rdev);
8583  	/* Initialize clocks */
8584  	radeon_get_clock_info(rdev_to_drm(rdev));
8585  
8586  	/* Fence driver */
8587  	radeon_fence_driver_init(rdev);
8588  
8589  	/* initialize memory controller */
8590  	r = cik_mc_init(rdev);
8591  	if (r)
8592  		return r;
8593  	/* Memory manager */
8594  	r = radeon_bo_init(rdev);
8595  	if (r)
8596  		return r;
8597  
8598  	if (rdev->flags & RADEON_IS_IGP) {
8599  		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600  		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601  			r = cik_init_microcode(rdev);
8602  			if (r) {
8603  				DRM_ERROR("Failed to load firmware!\n");
8604  				return r;
8605  			}
8606  		}
8607  	} else {
8608  		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609  		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610  		    !rdev->mc_fw) {
8611  			r = cik_init_microcode(rdev);
8612  			if (r) {
8613  				DRM_ERROR("Failed to load firmware!\n");
8614  				return r;
8615  			}
8616  		}
8617  	}
8618  
8619  	/* Initialize power management */
8620  	radeon_pm_init(rdev);
8621  
8622  	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623  	ring->ring_obj = NULL;
8624  	r600_ring_init(rdev, ring, 1024 * 1024);
8625  
8626  	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627  	ring->ring_obj = NULL;
8628  	r600_ring_init(rdev, ring, 1024 * 1024);
8629  	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630  	if (r)
8631  		return r;
8632  
8633  	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634  	ring->ring_obj = NULL;
8635  	r600_ring_init(rdev, ring, 1024 * 1024);
8636  	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637  	if (r)
8638  		return r;
8639  
8640  	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641  	ring->ring_obj = NULL;
8642  	r600_ring_init(rdev, ring, 256 * 1024);
8643  
8644  	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645  	ring->ring_obj = NULL;
8646  	r600_ring_init(rdev, ring, 256 * 1024);
8647  
8648  	cik_uvd_init(rdev);
8649  	cik_vce_init(rdev);
8650  
8651  	rdev->ih.ring_obj = NULL;
8652  	r600_ih_ring_init(rdev, 64 * 1024);
8653  
8654  	r = r600_pcie_gart_init(rdev);
8655  	if (r)
8656  		return r;
8657  
8658  	rdev->accel_working = true;
8659  	r = cik_startup(rdev);
8660  	if (r) {
8661  		dev_err(rdev->dev, "disabling GPU acceleration\n");
8662  		cik_cp_fini(rdev);
8663  		cik_sdma_fini(rdev);
8664  		cik_irq_fini(rdev);
8665  		sumo_rlc_fini(rdev);
8666  		cik_mec_fini(rdev);
8667  		radeon_wb_fini(rdev);
8668  		radeon_ib_pool_fini(rdev);
8669  		radeon_vm_manager_fini(rdev);
8670  		radeon_irq_kms_fini(rdev);
8671  		cik_pcie_gart_fini(rdev);
8672  		rdev->accel_working = false;
8673  	}
8674  
8675  	/* Don't start up if the MC ucode is missing.
8676  	 * The default clocks and voltages before the MC ucode
8677  	 * is loaded are not suffient for advanced operations.
8678  	 */
8679  	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680  		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681  		return -EINVAL;
8682  	}
8683  
8684  	return 0;
8685  }
8686  
8687  /**
8688   * cik_fini - asic specific driver and hw fini
8689   *
8690   * @rdev: radeon_device pointer
8691   *
8692   * Tear down the asic specific driver variables and program the hw
8693   * to an idle state (CIK).
8694   * Called at driver unload.
8695   */
cik_fini(struct radeon_device * rdev)8696  void cik_fini(struct radeon_device *rdev)
8697  {
8698  	radeon_pm_fini(rdev);
8699  	cik_cp_fini(rdev);
8700  	cik_sdma_fini(rdev);
8701  	cik_fini_pg(rdev);
8702  	cik_fini_cg(rdev);
8703  	cik_irq_fini(rdev);
8704  	sumo_rlc_fini(rdev);
8705  	cik_mec_fini(rdev);
8706  	radeon_wb_fini(rdev);
8707  	radeon_vm_manager_fini(rdev);
8708  	radeon_ib_pool_fini(rdev);
8709  	radeon_irq_kms_fini(rdev);
8710  	uvd_v1_0_fini(rdev);
8711  	radeon_uvd_fini(rdev);
8712  	radeon_vce_fini(rdev);
8713  	cik_pcie_gart_fini(rdev);
8714  	r600_vram_scratch_fini(rdev);
8715  	radeon_gem_fini(rdev);
8716  	radeon_fence_driver_fini(rdev);
8717  	radeon_bo_fini(rdev);
8718  	radeon_atombios_fini(rdev);
8719  	kfree(rdev->bios);
8720  	rdev->bios = NULL;
8721  }
8722  
dce8_program_fmt(struct drm_encoder * encoder)8723  void dce8_program_fmt(struct drm_encoder *encoder)
8724  {
8725  	struct drm_device *dev = encoder->dev;
8726  	struct radeon_device *rdev = dev->dev_private;
8727  	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728  	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729  	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730  	int bpc = 0;
8731  	u32 tmp = 0;
8732  	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733  
8734  	if (connector) {
8735  		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736  		bpc = radeon_get_monitor_bpc(connector);
8737  		dither = radeon_connector->dither;
8738  	}
8739  
8740  	/* LVDS/eDP FMT is set up by atom */
8741  	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742  		return;
8743  
8744  	/* not needed for analog */
8745  	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746  	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747  		return;
8748  
8749  	if (bpc == 0)
8750  		return;
8751  
8752  	switch (bpc) {
8753  	case 6:
8754  		if (dither == RADEON_FMT_DITHER_ENABLE)
8755  			/* XXX sort out optimal dither settings */
8756  			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757  				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758  		else
8759  			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760  		break;
8761  	case 8:
8762  		if (dither == RADEON_FMT_DITHER_ENABLE)
8763  			/* XXX sort out optimal dither settings */
8764  			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765  				FMT_RGB_RANDOM_ENABLE |
8766  				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767  		else
8768  			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769  		break;
8770  	case 10:
8771  		if (dither == RADEON_FMT_DITHER_ENABLE)
8772  			/* XXX sort out optimal dither settings */
8773  			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774  				FMT_RGB_RANDOM_ENABLE |
8775  				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776  		else
8777  			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778  		break;
8779  	default:
8780  		/* not needed */
8781  		break;
8782  	}
8783  
8784  	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785  }
8786  
8787  /* display watermark setup */
8788  /**
8789   * dce8_line_buffer_adjust - Set up the line buffer
8790   *
8791   * @rdev: radeon_device pointer
8792   * @radeon_crtc: the selected display controller
8793   * @mode: the current display mode on the selected display
8794   * controller
8795   *
8796   * Setup up the line buffer allocation for
8797   * the selected display controller (CIK).
8798   * Returns the line buffer size in pixels.
8799   */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)8800  static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801  				   struct radeon_crtc *radeon_crtc,
8802  				   struct drm_display_mode *mode)
8803  {
8804  	u32 tmp, buffer_alloc, i;
8805  	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806  	/*
8807  	 * Line Buffer Setup
8808  	 * There are 6 line buffers, one for each display controllers.
8809  	 * There are 3 partitions per LB. Select the number of partitions
8810  	 * to enable based on the display width.  For display widths larger
8811  	 * than 4096, you need use to use 2 display controllers and combine
8812  	 * them using the stereo blender.
8813  	 */
8814  	if (radeon_crtc->base.enabled && mode) {
8815  		if (mode->crtc_hdisplay < 1920) {
8816  			tmp = 1;
8817  			buffer_alloc = 2;
8818  		} else if (mode->crtc_hdisplay < 2560) {
8819  			tmp = 2;
8820  			buffer_alloc = 2;
8821  		} else if (mode->crtc_hdisplay < 4096) {
8822  			tmp = 0;
8823  			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824  		} else {
8825  			DRM_DEBUG_KMS("Mode too big for LB!\n");
8826  			tmp = 0;
8827  			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828  		}
8829  	} else {
8830  		tmp = 1;
8831  		buffer_alloc = 0;
8832  	}
8833  
8834  	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835  	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836  
8837  	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838  	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839  	for (i = 0; i < rdev->usec_timeout; i++) {
8840  		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841  		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842  			break;
8843  		udelay(1);
8844  	}
8845  
8846  	if (radeon_crtc->base.enabled && mode) {
8847  		switch (tmp) {
8848  		case 0:
8849  		default:
8850  			return 4096 * 2;
8851  		case 1:
8852  			return 1920 * 2;
8853  		case 2:
8854  			return 2560 * 2;
8855  		}
8856  	}
8857  
8858  	/* controller not enabled, so no lb used */
8859  	return 0;
8860  }
8861  
8862  /**
8863   * cik_get_number_of_dram_channels - get the number of dram channels
8864   *
8865   * @rdev: radeon_device pointer
8866   *
8867   * Look up the number of video ram channels (CIK).
8868   * Used for display watermark bandwidth calculations
8869   * Returns the number of dram channels
8870   */
cik_get_number_of_dram_channels(struct radeon_device * rdev)8871  static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872  {
8873  	u32 tmp = RREG32(MC_SHARED_CHMAP);
8874  
8875  	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876  	case 0:
8877  	default:
8878  		return 1;
8879  	case 1:
8880  		return 2;
8881  	case 2:
8882  		return 4;
8883  	case 3:
8884  		return 8;
8885  	case 4:
8886  		return 3;
8887  	case 5:
8888  		return 6;
8889  	case 6:
8890  		return 10;
8891  	case 7:
8892  		return 12;
8893  	case 8:
8894  		return 16;
8895  	}
8896  }
8897  
8898  struct dce8_wm_params {
8899  	u32 dram_channels; /* number of dram channels */
8900  	u32 yclk;          /* bandwidth per dram data pin in kHz */
8901  	u32 sclk;          /* engine clock in kHz */
8902  	u32 disp_clk;      /* display clock in kHz */
8903  	u32 src_width;     /* viewport width */
8904  	u32 active_time;   /* active display time in ns */
8905  	u32 blank_time;    /* blank time in ns */
8906  	bool interlaced;    /* mode is interlaced */
8907  	fixed20_12 vsc;    /* vertical scale ratio */
8908  	u32 num_heads;     /* number of active crtcs */
8909  	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910  	u32 lb_size;       /* line buffer allocated to pipe */
8911  	u32 vtaps;         /* vertical scaler taps */
8912  };
8913  
8914  /**
8915   * dce8_dram_bandwidth - get the dram bandwidth
8916   *
8917   * @wm: watermark calculation data
8918   *
8919   * Calculate the raw dram bandwidth (CIK).
8920   * Used for display watermark bandwidth calculations
8921   * Returns the dram bandwidth in MBytes/s
8922   */
dce8_dram_bandwidth(struct dce8_wm_params * wm)8923  static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924  {
8925  	/* Calculate raw DRAM Bandwidth */
8926  	fixed20_12 dram_efficiency; /* 0.7 */
8927  	fixed20_12 yclk, dram_channels, bandwidth;
8928  	fixed20_12 a;
8929  
8930  	a.full = dfixed_const(1000);
8931  	yclk.full = dfixed_const(wm->yclk);
8932  	yclk.full = dfixed_div(yclk, a);
8933  	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934  	a.full = dfixed_const(10);
8935  	dram_efficiency.full = dfixed_const(7);
8936  	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937  	bandwidth.full = dfixed_mul(dram_channels, yclk);
8938  	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939  
8940  	return dfixed_trunc(bandwidth);
8941  }
8942  
8943  /**
8944   * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945   *
8946   * @wm: watermark calculation data
8947   *
8948   * Calculate the dram bandwidth used for display (CIK).
8949   * Used for display watermark bandwidth calculations
8950   * Returns the dram bandwidth for display in MBytes/s
8951   */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)8952  static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953  {
8954  	/* Calculate DRAM Bandwidth and the part allocated to display. */
8955  	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956  	fixed20_12 yclk, dram_channels, bandwidth;
8957  	fixed20_12 a;
8958  
8959  	a.full = dfixed_const(1000);
8960  	yclk.full = dfixed_const(wm->yclk);
8961  	yclk.full = dfixed_div(yclk, a);
8962  	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963  	a.full = dfixed_const(10);
8964  	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965  	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966  	bandwidth.full = dfixed_mul(dram_channels, yclk);
8967  	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968  
8969  	return dfixed_trunc(bandwidth);
8970  }
8971  
8972  /**
8973   * dce8_data_return_bandwidth - get the data return bandwidth
8974   *
8975   * @wm: watermark calculation data
8976   *
8977   * Calculate the data return bandwidth used for display (CIK).
8978   * Used for display watermark bandwidth calculations
8979   * Returns the data return bandwidth in MBytes/s
8980   */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)8981  static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982  {
8983  	/* Calculate the display Data return Bandwidth */
8984  	fixed20_12 return_efficiency; /* 0.8 */
8985  	fixed20_12 sclk, bandwidth;
8986  	fixed20_12 a;
8987  
8988  	a.full = dfixed_const(1000);
8989  	sclk.full = dfixed_const(wm->sclk);
8990  	sclk.full = dfixed_div(sclk, a);
8991  	a.full = dfixed_const(10);
8992  	return_efficiency.full = dfixed_const(8);
8993  	return_efficiency.full = dfixed_div(return_efficiency, a);
8994  	a.full = dfixed_const(32);
8995  	bandwidth.full = dfixed_mul(a, sclk);
8996  	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997  
8998  	return dfixed_trunc(bandwidth);
8999  }
9000  
9001  /**
9002   * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003   *
9004   * @wm: watermark calculation data
9005   *
9006   * Calculate the dmif bandwidth used for display (CIK).
9007   * Used for display watermark bandwidth calculations
9008   * Returns the dmif bandwidth in MBytes/s
9009   */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9010  static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011  {
9012  	/* Calculate the DMIF Request Bandwidth */
9013  	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014  	fixed20_12 disp_clk, bandwidth;
9015  	fixed20_12 a, b;
9016  
9017  	a.full = dfixed_const(1000);
9018  	disp_clk.full = dfixed_const(wm->disp_clk);
9019  	disp_clk.full = dfixed_div(disp_clk, a);
9020  	a.full = dfixed_const(32);
9021  	b.full = dfixed_mul(a, disp_clk);
9022  
9023  	a.full = dfixed_const(10);
9024  	disp_clk_request_efficiency.full = dfixed_const(8);
9025  	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026  
9027  	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028  
9029  	return dfixed_trunc(bandwidth);
9030  }
9031  
9032  /**
9033   * dce8_available_bandwidth - get the min available bandwidth
9034   *
9035   * @wm: watermark calculation data
9036   *
9037   * Calculate the min available bandwidth used for display (CIK).
9038   * Used for display watermark bandwidth calculations
9039   * Returns the min available bandwidth in MBytes/s
9040   */
dce8_available_bandwidth(struct dce8_wm_params * wm)9041  static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042  {
9043  	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044  	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045  	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046  	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047  
9048  	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049  }
9050  
9051  /**
9052   * dce8_average_bandwidth - get the average available bandwidth
9053   *
9054   * @wm: watermark calculation data
9055   *
9056   * Calculate the average available bandwidth used for display (CIK).
9057   * Used for display watermark bandwidth calculations
9058   * Returns the average available bandwidth in MBytes/s
9059   */
dce8_average_bandwidth(struct dce8_wm_params * wm)9060  static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061  {
9062  	/* Calculate the display mode Average Bandwidth
9063  	 * DisplayMode should contain the source and destination dimensions,
9064  	 * timing, etc.
9065  	 */
9066  	fixed20_12 bpp;
9067  	fixed20_12 line_time;
9068  	fixed20_12 src_width;
9069  	fixed20_12 bandwidth;
9070  	fixed20_12 a;
9071  
9072  	a.full = dfixed_const(1000);
9073  	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074  	line_time.full = dfixed_div(line_time, a);
9075  	bpp.full = dfixed_const(wm->bytes_per_pixel);
9076  	src_width.full = dfixed_const(wm->src_width);
9077  	bandwidth.full = dfixed_mul(src_width, bpp);
9078  	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079  	bandwidth.full = dfixed_div(bandwidth, line_time);
9080  
9081  	return dfixed_trunc(bandwidth);
9082  }
9083  
9084  /**
9085   * dce8_latency_watermark - get the latency watermark
9086   *
9087   * @wm: watermark calculation data
9088   *
9089   * Calculate the latency watermark (CIK).
9090   * Used for display watermark bandwidth calculations
9091   * Returns the latency watermark in ns
9092   */
dce8_latency_watermark(struct dce8_wm_params * wm)9093  static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094  {
9095  	/* First calculate the latency in ns */
9096  	u32 mc_latency = 2000; /* 2000 ns. */
9097  	u32 available_bandwidth = dce8_available_bandwidth(wm);
9098  	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099  	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100  	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101  	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102  		(wm->num_heads * cursor_line_pair_return_time);
9103  	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104  	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105  	u32 tmp, dmif_size = 12288;
9106  	fixed20_12 a, b, c;
9107  
9108  	if (wm->num_heads == 0)
9109  		return 0;
9110  
9111  	a.full = dfixed_const(2);
9112  	b.full = dfixed_const(1);
9113  	if ((wm->vsc.full > a.full) ||
9114  	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115  	    (wm->vtaps >= 5) ||
9116  	    ((wm->vsc.full >= a.full) && wm->interlaced))
9117  		max_src_lines_per_dst_line = 4;
9118  	else
9119  		max_src_lines_per_dst_line = 2;
9120  
9121  	a.full = dfixed_const(available_bandwidth);
9122  	b.full = dfixed_const(wm->num_heads);
9123  	a.full = dfixed_div(a, b);
9124  	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125  	tmp = min(dfixed_trunc(a), tmp);
9126  
9127  	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128  
9129  	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130  	b.full = dfixed_const(1000);
9131  	c.full = dfixed_const(lb_fill_bw);
9132  	b.full = dfixed_div(c, b);
9133  	a.full = dfixed_div(a, b);
9134  	line_fill_time = dfixed_trunc(a);
9135  
9136  	if (line_fill_time < wm->active_time)
9137  		return latency;
9138  	else
9139  		return latency + (line_fill_time - wm->active_time);
9140  
9141  }
9142  
9143  /**
9144   * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145   * average and available dram bandwidth
9146   *
9147   * @wm: watermark calculation data
9148   *
9149   * Check if the display average bandwidth fits in the display
9150   * dram bandwidth (CIK).
9151   * Used for display watermark bandwidth calculations
9152   * Returns true if the display fits, false if not.
9153   */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9154  static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155  {
9156  	if (dce8_average_bandwidth(wm) <=
9157  	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158  		return true;
9159  	else
9160  		return false;
9161  }
9162  
9163  /**
9164   * dce8_average_bandwidth_vs_available_bandwidth - check
9165   * average and available bandwidth
9166   *
9167   * @wm: watermark calculation data
9168   *
9169   * Check if the display average bandwidth fits in the display
9170   * available bandwidth (CIK).
9171   * Used for display watermark bandwidth calculations
9172   * Returns true if the display fits, false if not.
9173   */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9174  static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175  {
9176  	if (dce8_average_bandwidth(wm) <=
9177  	    (dce8_available_bandwidth(wm) / wm->num_heads))
9178  		return true;
9179  	else
9180  		return false;
9181  }
9182  
9183  /**
9184   * dce8_check_latency_hiding - check latency hiding
9185   *
9186   * @wm: watermark calculation data
9187   *
9188   * Check latency hiding (CIK).
9189   * Used for display watermark bandwidth calculations
9190   * Returns true if the display fits, false if not.
9191   */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9192  static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193  {
9194  	u32 lb_partitions = wm->lb_size / wm->src_width;
9195  	u32 line_time = wm->active_time + wm->blank_time;
9196  	u32 latency_tolerant_lines;
9197  	u32 latency_hiding;
9198  	fixed20_12 a;
9199  
9200  	a.full = dfixed_const(1);
9201  	if (wm->vsc.full > a.full)
9202  		latency_tolerant_lines = 1;
9203  	else {
9204  		if (lb_partitions <= (wm->vtaps + 1))
9205  			latency_tolerant_lines = 1;
9206  		else
9207  			latency_tolerant_lines = 2;
9208  	}
9209  
9210  	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211  
9212  	if (dce8_latency_watermark(wm) <= latency_hiding)
9213  		return true;
9214  	else
9215  		return false;
9216  }
9217  
9218  /**
9219   * dce8_program_watermarks - program display watermarks
9220   *
9221   * @rdev: radeon_device pointer
9222   * @radeon_crtc: the selected display controller
9223   * @lb_size: line buffer size
9224   * @num_heads: number of display controllers in use
9225   *
9226   * Calculate and program the display watermarks for the
9227   * selected display controller (CIK).
9228   */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9229  static void dce8_program_watermarks(struct radeon_device *rdev,
9230  				    struct radeon_crtc *radeon_crtc,
9231  				    u32 lb_size, u32 num_heads)
9232  {
9233  	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234  	struct dce8_wm_params wm_low, wm_high;
9235  	u32 active_time;
9236  	u32 line_time = 0;
9237  	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238  	u32 tmp, wm_mask;
9239  
9240  	if (radeon_crtc->base.enabled && num_heads && mode) {
9241  		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242  					    (u32)mode->clock);
9243  		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244  					  (u32)mode->clock);
9245  		line_time = min(line_time, (u32)65535);
9246  
9247  		/* watermark for high clocks */
9248  		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249  		    rdev->pm.dpm_enabled) {
9250  			wm_high.yclk =
9251  				radeon_dpm_get_mclk(rdev, false) * 10;
9252  			wm_high.sclk =
9253  				radeon_dpm_get_sclk(rdev, false) * 10;
9254  		} else {
9255  			wm_high.yclk = rdev->pm.current_mclk * 10;
9256  			wm_high.sclk = rdev->pm.current_sclk * 10;
9257  		}
9258  
9259  		wm_high.disp_clk = mode->clock;
9260  		wm_high.src_width = mode->crtc_hdisplay;
9261  		wm_high.active_time = active_time;
9262  		wm_high.blank_time = line_time - wm_high.active_time;
9263  		wm_high.interlaced = false;
9264  		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265  			wm_high.interlaced = true;
9266  		wm_high.vsc = radeon_crtc->vsc;
9267  		wm_high.vtaps = 1;
9268  		if (radeon_crtc->rmx_type != RMX_OFF)
9269  			wm_high.vtaps = 2;
9270  		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271  		wm_high.lb_size = lb_size;
9272  		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273  		wm_high.num_heads = num_heads;
9274  
9275  		/* set for high clocks */
9276  		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277  
9278  		/* possibly force display priority to high */
9279  		/* should really do this at mode validation time... */
9280  		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281  		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282  		    !dce8_check_latency_hiding(&wm_high) ||
9283  		    (rdev->disp_priority == 2)) {
9284  			DRM_DEBUG_KMS("force priority to high\n");
9285  		}
9286  
9287  		/* watermark for low clocks */
9288  		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289  		    rdev->pm.dpm_enabled) {
9290  			wm_low.yclk =
9291  				radeon_dpm_get_mclk(rdev, true) * 10;
9292  			wm_low.sclk =
9293  				radeon_dpm_get_sclk(rdev, true) * 10;
9294  		} else {
9295  			wm_low.yclk = rdev->pm.current_mclk * 10;
9296  			wm_low.sclk = rdev->pm.current_sclk * 10;
9297  		}
9298  
9299  		wm_low.disp_clk = mode->clock;
9300  		wm_low.src_width = mode->crtc_hdisplay;
9301  		wm_low.active_time = active_time;
9302  		wm_low.blank_time = line_time - wm_low.active_time;
9303  		wm_low.interlaced = false;
9304  		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305  			wm_low.interlaced = true;
9306  		wm_low.vsc = radeon_crtc->vsc;
9307  		wm_low.vtaps = 1;
9308  		if (radeon_crtc->rmx_type != RMX_OFF)
9309  			wm_low.vtaps = 2;
9310  		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311  		wm_low.lb_size = lb_size;
9312  		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313  		wm_low.num_heads = num_heads;
9314  
9315  		/* set for low clocks */
9316  		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317  
9318  		/* possibly force display priority to high */
9319  		/* should really do this at mode validation time... */
9320  		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321  		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322  		    !dce8_check_latency_hiding(&wm_low) ||
9323  		    (rdev->disp_priority == 2)) {
9324  			DRM_DEBUG_KMS("force priority to high\n");
9325  		}
9326  
9327  		/* Save number of lines the linebuffer leads before the scanout */
9328  		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329  	}
9330  
9331  	/* select wm A */
9332  	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333  	tmp = wm_mask;
9334  	tmp &= ~LATENCY_WATERMARK_MASK(3);
9335  	tmp |= LATENCY_WATERMARK_MASK(1);
9336  	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337  	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338  	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339  		LATENCY_HIGH_WATERMARK(line_time)));
9340  	/* select wm B */
9341  	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342  	tmp &= ~LATENCY_WATERMARK_MASK(3);
9343  	tmp |= LATENCY_WATERMARK_MASK(2);
9344  	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345  	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346  	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347  		LATENCY_HIGH_WATERMARK(line_time)));
9348  	/* restore original selection */
9349  	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350  
9351  	/* save values for DPM */
9352  	radeon_crtc->line_time = line_time;
9353  	radeon_crtc->wm_high = latency_watermark_a;
9354  	radeon_crtc->wm_low = latency_watermark_b;
9355  }
9356  
9357  /**
9358   * dce8_bandwidth_update - program display watermarks
9359   *
9360   * @rdev: radeon_device pointer
9361   *
9362   * Calculate and program the display watermarks and line
9363   * buffer allocation (CIK).
9364   */
dce8_bandwidth_update(struct radeon_device * rdev)9365  void dce8_bandwidth_update(struct radeon_device *rdev)
9366  {
9367  	struct drm_display_mode *mode = NULL;
9368  	u32 num_heads = 0, lb_size;
9369  	int i;
9370  
9371  	if (!rdev->mode_info.mode_config_initialized)
9372  		return;
9373  
9374  	radeon_update_display_priority(rdev);
9375  
9376  	for (i = 0; i < rdev->num_crtc; i++) {
9377  		if (rdev->mode_info.crtcs[i]->base.enabled)
9378  			num_heads++;
9379  	}
9380  	for (i = 0; i < rdev->num_crtc; i++) {
9381  		mode = &rdev->mode_info.crtcs[i]->base.mode;
9382  		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383  		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384  	}
9385  }
9386  
9387  /**
9388   * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389   *
9390   * @rdev: radeon_device pointer
9391   *
9392   * Fetches a GPU clock counter snapshot (SI).
9393   * Returns the 64 bit clock counter snapshot.
9394   */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9395  uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396  {
9397  	uint64_t clock;
9398  
9399  	mutex_lock(&rdev->gpu_clock_mutex);
9400  	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401  	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402  		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403  	mutex_unlock(&rdev->gpu_clock_mutex);
9404  	return clock;
9405  }
9406  
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9407  static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408  			     u32 cntl_reg, u32 status_reg)
9409  {
9410  	int r, i;
9411  	struct atom_clock_dividers dividers;
9412  	uint32_t tmp;
9413  
9414  	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415  					   clock, false, &dividers);
9416  	if (r)
9417  		return r;
9418  
9419  	tmp = RREG32_SMC(cntl_reg);
9420  	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421  	tmp |= dividers.post_divider;
9422  	WREG32_SMC(cntl_reg, tmp);
9423  
9424  	for (i = 0; i < 100; i++) {
9425  		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426  			break;
9427  		mdelay(10);
9428  	}
9429  	if (i == 100)
9430  		return -ETIMEDOUT;
9431  
9432  	return 0;
9433  }
9434  
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9435  int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436  {
9437  	int r = 0;
9438  
9439  	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440  	if (r)
9441  		return r;
9442  
9443  	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444  	return r;
9445  }
9446  
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9447  int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448  {
9449  	int r, i;
9450  	struct atom_clock_dividers dividers;
9451  	u32 tmp;
9452  
9453  	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454  					   ecclk, false, &dividers);
9455  	if (r)
9456  		return r;
9457  
9458  	for (i = 0; i < 100; i++) {
9459  		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460  			break;
9461  		mdelay(10);
9462  	}
9463  	if (i == 100)
9464  		return -ETIMEDOUT;
9465  
9466  	tmp = RREG32_SMC(CG_ECLK_CNTL);
9467  	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468  	tmp |= dividers.post_divider;
9469  	WREG32_SMC(CG_ECLK_CNTL, tmp);
9470  
9471  	for (i = 0; i < 100; i++) {
9472  		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473  			break;
9474  		mdelay(10);
9475  	}
9476  	if (i == 100)
9477  		return -ETIMEDOUT;
9478  
9479  	return 0;
9480  }
9481  
cik_pcie_gen3_enable(struct radeon_device * rdev)9482  static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483  {
9484  	struct pci_dev *root = rdev->pdev->bus->self;
9485  	enum pci_bus_speed speed_cap;
9486  	u32 speed_cntl, current_data_rate;
9487  	int i;
9488  	u16 tmp16;
9489  
9490  	if (pci_is_root_bus(rdev->pdev->bus))
9491  		return;
9492  
9493  	if (radeon_pcie_gen2 == 0)
9494  		return;
9495  
9496  	if (rdev->flags & RADEON_IS_IGP)
9497  		return;
9498  
9499  	if (!(rdev->flags & RADEON_IS_PCIE))
9500  		return;
9501  
9502  	speed_cap = pcie_get_speed_cap(root);
9503  	if (speed_cap == PCI_SPEED_UNKNOWN)
9504  		return;
9505  
9506  	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507  	    (speed_cap != PCIE_SPEED_5_0GT))
9508  		return;
9509  
9510  	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511  	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512  		LC_CURRENT_DATA_RATE_SHIFT;
9513  	if (speed_cap == PCIE_SPEED_8_0GT) {
9514  		if (current_data_rate == 2) {
9515  			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516  			return;
9517  		}
9518  		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519  	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9520  		if (current_data_rate == 1) {
9521  			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522  			return;
9523  		}
9524  		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525  	}
9526  
9527  	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9528  		return;
9529  
9530  	if (speed_cap == PCIE_SPEED_8_0GT) {
9531  		/* re-try equalization if gen3 is not already enabled */
9532  		if (current_data_rate != 2) {
9533  			u16 bridge_cfg, gpu_cfg;
9534  			u16 bridge_cfg2, gpu_cfg2;
9535  			u32 max_lw, current_lw, tmp;
9536  
9537  			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9538  			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9539  
9540  			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9541  			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9542  			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9543  
9544  			if (current_lw < max_lw) {
9545  				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9546  				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9547  					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9548  					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9549  					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9550  					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9551  				}
9552  			}
9553  
9554  			for (i = 0; i < 10; i++) {
9555  				/* check status */
9556  				pcie_capability_read_word(rdev->pdev,
9557  							  PCI_EXP_DEVSTA,
9558  							  &tmp16);
9559  				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9560  					break;
9561  
9562  				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9563  							  &bridge_cfg);
9564  				pcie_capability_read_word(rdev->pdev,
9565  							  PCI_EXP_LNKCTL,
9566  							  &gpu_cfg);
9567  
9568  				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9569  							  &bridge_cfg2);
9570  				pcie_capability_read_word(rdev->pdev,
9571  							  PCI_EXP_LNKCTL2,
9572  							  &gpu_cfg2);
9573  
9574  				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9575  				tmp |= LC_SET_QUIESCE;
9576  				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9577  
9578  				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9579  				tmp |= LC_REDO_EQ;
9580  				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9581  
9582  				msleep(100);
9583  
9584  				/* linkctl */
9585  				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
9586  								   PCI_EXP_LNKCTL_HAWD,
9587  								   bridge_cfg &
9588  								   PCI_EXP_LNKCTL_HAWD);
9589  				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
9590  								   PCI_EXP_LNKCTL_HAWD,
9591  								   gpu_cfg &
9592  								   PCI_EXP_LNKCTL_HAWD);
9593  
9594  				/* linkctl2 */
9595  				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
9596  								   PCI_EXP_LNKCTL2_ENTER_COMP |
9597  								   PCI_EXP_LNKCTL2_TX_MARGIN,
9598  								   bridge_cfg2 |
9599  								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9600  								    PCI_EXP_LNKCTL2_TX_MARGIN));
9601  				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9602  								   PCI_EXP_LNKCTL2_ENTER_COMP |
9603  								   PCI_EXP_LNKCTL2_TX_MARGIN,
9604  								   gpu_cfg2 |
9605  								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9606  								    PCI_EXP_LNKCTL2_TX_MARGIN));
9607  
9608  				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9609  				tmp &= ~LC_SET_QUIESCE;
9610  				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9611  			}
9612  		}
9613  	}
9614  
9615  	/* set the link speed */
9616  	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9617  	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9618  	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9619  
9620  	tmp16 = 0;
9621  	if (speed_cap == PCIE_SPEED_8_0GT)
9622  		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9623  	else if (speed_cap == PCIE_SPEED_5_0GT)
9624  		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9625  	else
9626  		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9627  	pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9628  					   PCI_EXP_LNKCTL2_TLS, tmp16);
9629  
9630  	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9631  	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9632  	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9633  
9634  	for (i = 0; i < rdev->usec_timeout; i++) {
9635  		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9636  		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9637  			break;
9638  		udelay(1);
9639  	}
9640  }
9641  
cik_program_aspm(struct radeon_device * rdev)9642  static void cik_program_aspm(struct radeon_device *rdev)
9643  {
9644  	u32 data, orig;
9645  	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9646  	bool disable_clkreq = false;
9647  
9648  	if (radeon_aspm == 0)
9649  		return;
9650  
9651  	/* XXX double check IGPs */
9652  	if (rdev->flags & RADEON_IS_IGP)
9653  		return;
9654  
9655  	if (!(rdev->flags & RADEON_IS_PCIE))
9656  		return;
9657  
9658  	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9659  	data &= ~LC_XMIT_N_FTS_MASK;
9660  	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9661  	if (orig != data)
9662  		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9663  
9664  	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9665  	data |= LC_GO_TO_RECOVERY;
9666  	if (orig != data)
9667  		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9668  
9669  	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9670  	data |= P_IGNORE_EDB_ERR;
9671  	if (orig != data)
9672  		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9673  
9674  	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9675  	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9676  	data |= LC_PMI_TO_L1_DIS;
9677  	if (!disable_l0s)
9678  		data |= LC_L0S_INACTIVITY(7);
9679  
9680  	if (!disable_l1) {
9681  		data |= LC_L1_INACTIVITY(7);
9682  		data &= ~LC_PMI_TO_L1_DIS;
9683  		if (orig != data)
9684  			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9685  
9686  		if (!disable_plloff_in_l1) {
9687  			bool clk_req_support;
9688  
9689  			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9690  			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9691  			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9692  			if (orig != data)
9693  				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9694  
9695  			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9696  			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9697  			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9698  			if (orig != data)
9699  				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9700  
9701  			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9702  			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9703  			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9704  			if (orig != data)
9705  				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9706  
9707  			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9708  			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9709  			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9710  			if (orig != data)
9711  				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9712  
9713  			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9714  			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9715  			data |= LC_DYN_LANES_PWR_STATE(3);
9716  			if (orig != data)
9717  				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9718  
9719  			if (!disable_clkreq &&
9720  			    !pci_is_root_bus(rdev->pdev->bus)) {
9721  				struct pci_dev *root = rdev->pdev->bus->self;
9722  				u32 lnkcap;
9723  
9724  				clk_req_support = false;
9725  				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9726  				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9727  					clk_req_support = true;
9728  			} else {
9729  				clk_req_support = false;
9730  			}
9731  
9732  			if (clk_req_support) {
9733  				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9734  				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9735  				if (orig != data)
9736  					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9737  
9738  				orig = data = RREG32_SMC(THM_CLK_CNTL);
9739  				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9740  				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9741  				if (orig != data)
9742  					WREG32_SMC(THM_CLK_CNTL, data);
9743  
9744  				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9745  				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9746  				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9747  				if (orig != data)
9748  					WREG32_SMC(MISC_CLK_CTRL, data);
9749  
9750  				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9751  				data &= ~BCLK_AS_XCLK;
9752  				if (orig != data)
9753  					WREG32_SMC(CG_CLKPIN_CNTL, data);
9754  
9755  				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9756  				data &= ~FORCE_BIF_REFCLK_EN;
9757  				if (orig != data)
9758  					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9759  
9760  				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9761  				data &= ~MPLL_CLKOUT_SEL_MASK;
9762  				data |= MPLL_CLKOUT_SEL(4);
9763  				if (orig != data)
9764  					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9765  			}
9766  		}
9767  	} else {
9768  		if (orig != data)
9769  			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9770  	}
9771  
9772  	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9773  	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9774  	if (orig != data)
9775  		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9776  
9777  	if (!disable_l0s) {
9778  		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9779  		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9780  			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9781  			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9782  				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9783  				data &= ~LC_L0S_INACTIVITY_MASK;
9784  				if (orig != data)
9785  					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9786  			}
9787  		}
9788  	}
9789  }
9790