xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision fcbd8037f7df694aa7bfb7ce82c0c7f5e53e7b7b)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 
29 #include <drm/drm_pci.h>
30 #include <drm/drm_vblank.h>
31 
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "cikd.h"
35 #include "clearstate_ci.h"
36 #include "radeon.h"
37 #include "radeon_asic.h"
38 #include "radeon_audio.h"
39 #include "radeon_ucode.h"
40 
41 #define SH_MEM_CONFIG_GFX_DEFAULT \
42 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
43 
44 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
55 MODULE_FIRMWARE("radeon/bonaire_me.bin");
56 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
57 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
58 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
60 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
61 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
62 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
75 MODULE_FIRMWARE("radeon/hawaii_me.bin");
76 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
77 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
78 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
80 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
81 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
82 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
83 
84 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
87 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
88 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
89 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
92 MODULE_FIRMWARE("radeon/kaveri_me.bin");
93 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
94 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
95 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
96 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
97 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
98 
99 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
100 MODULE_FIRMWARE("radeon/KABINI_me.bin");
101 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
102 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
103 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
104 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
105 
106 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
107 MODULE_FIRMWARE("radeon/kabini_me.bin");
108 MODULE_FIRMWARE("radeon/kabini_ce.bin");
109 MODULE_FIRMWARE("radeon/kabini_mec.bin");
110 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
111 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
112 
113 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
116 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
117 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
118 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
119 
120 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
121 MODULE_FIRMWARE("radeon/mullins_me.bin");
122 MODULE_FIRMWARE("radeon/mullins_ce.bin");
123 MODULE_FIRMWARE("radeon/mullins_mec.bin");
124 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
125 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
126 
127 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
128 extern void r600_ih_ring_fini(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 extern void sumo_rlc_fini(struct radeon_device *rdev);
133 extern int sumo_rlc_init(struct radeon_device *rdev);
134 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
135 extern void si_rlc_reset(struct radeon_device *rdev);
136 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
137 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
138 extern int cik_sdma_resume(struct radeon_device *rdev);
139 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
140 extern void cik_sdma_fini(struct radeon_device *rdev);
141 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
142 static void cik_rlc_stop(struct radeon_device *rdev);
143 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
144 static void cik_program_aspm(struct radeon_device *rdev);
145 static void cik_init_pg(struct radeon_device *rdev);
146 static void cik_init_cg(struct radeon_device *rdev);
147 static void cik_fini_pg(struct radeon_device *rdev);
148 static void cik_fini_cg(struct radeon_device *rdev);
149 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
150 					  bool enable);
151 
152 /**
153  * cik_get_allowed_info_register - fetch the register for the info ioctl
154  *
155  * @rdev: radeon_device pointer
156  * @reg: register offset in bytes
157  * @val: register value
158  *
159  * Returns 0 for success or -EINVAL for an invalid register
160  *
161  */
162 int cik_get_allowed_info_register(struct radeon_device *rdev,
163 				  u32 reg, u32 *val)
164 {
165 	switch (reg) {
166 	case GRBM_STATUS:
167 	case GRBM_STATUS2:
168 	case GRBM_STATUS_SE0:
169 	case GRBM_STATUS_SE1:
170 	case GRBM_STATUS_SE2:
171 	case GRBM_STATUS_SE3:
172 	case SRBM_STATUS:
173 	case SRBM_STATUS2:
174 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
175 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
176 	case UVD_STATUS:
177 	/* TODO VCE */
178 		*val = RREG32(reg);
179 		return 0;
180 	default:
181 		return -EINVAL;
182 	}
183 }
184 
185 /*
186  * Indirect registers accessor
187  */
188 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
189 {
190 	unsigned long flags;
191 	u32 r;
192 
193 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
194 	WREG32(CIK_DIDT_IND_INDEX, (reg));
195 	r = RREG32(CIK_DIDT_IND_DATA);
196 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
197 	return r;
198 }
199 
200 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
201 {
202 	unsigned long flags;
203 
204 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
205 	WREG32(CIK_DIDT_IND_INDEX, (reg));
206 	WREG32(CIK_DIDT_IND_DATA, (v));
207 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
208 }
209 
210 /* get temperature in millidegrees */
211 int ci_get_temp(struct radeon_device *rdev)
212 {
213 	u32 temp;
214 	int actual_temp = 0;
215 
216 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
217 		CTF_TEMP_SHIFT;
218 
219 	if (temp & 0x200)
220 		actual_temp = 255;
221 	else
222 		actual_temp = temp & 0x1ff;
223 
224 	actual_temp = actual_temp * 1000;
225 
226 	return actual_temp;
227 }
228 
229 /* get temperature in millidegrees */
230 int kv_get_temp(struct radeon_device *rdev)
231 {
232 	u32 temp;
233 	int actual_temp = 0;
234 
235 	temp = RREG32_SMC(0xC0300E0C);
236 
237 	if (temp)
238 		actual_temp = (temp / 8) - 49;
239 	else
240 		actual_temp = 0;
241 
242 	actual_temp = actual_temp * 1000;
243 
244 	return actual_temp;
245 }
246 
247 /*
248  * Indirect registers accessor
249  */
250 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
251 {
252 	unsigned long flags;
253 	u32 r;
254 
255 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
256 	WREG32(PCIE_INDEX, reg);
257 	(void)RREG32(PCIE_INDEX);
258 	r = RREG32(PCIE_DATA);
259 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260 	return r;
261 }
262 
263 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
264 {
265 	unsigned long flags;
266 
267 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
268 	WREG32(PCIE_INDEX, reg);
269 	(void)RREG32(PCIE_INDEX);
270 	WREG32(PCIE_DATA, v);
271 	(void)RREG32(PCIE_DATA);
272 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
273 }
274 
275 static const u32 spectre_rlc_save_restore_register_list[] =
276 {
277 	(0x0e00 << 16) | (0xc12c >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc140 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc150 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc15c >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc168 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc170 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc178 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc204 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2b4 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc2b8 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0xc2bc >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0xc2c0 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x8228 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x829c >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x869c >> 2),
306 	0x00000000,
307 	(0x0600 << 16) | (0x98f4 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x98f8 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x9900 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0xc260 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x90e8 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x3c000 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x3c00c >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0x8c1c >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0x9700 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x4e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x5e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x6e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0x7e00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x8e00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0x9e00 << 16) | (0xcd20 >> 2),
338 	0x00000000,
339 	(0xae00 << 16) | (0xcd20 >> 2),
340 	0x00000000,
341 	(0xbe00 << 16) | (0xcd20 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0x89bc >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0x8900 >> 2),
346 	0x00000000,
347 	0x3,
348 	(0x0e00 << 16) | (0xc130 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc134 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc1fc >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc208 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc264 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc268 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc26c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc270 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc274 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc278 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc27c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc280 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc284 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc288 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc28c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc290 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc294 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc298 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc29c >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2a0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2a4 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc2a8 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0xc2ac  >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0xc2b0 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x301d0 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30238 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30250 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0x30254 >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x30258 >> 2),
405 	0x00000000,
406 	(0x0e00 << 16) | (0x3025c >> 2),
407 	0x00000000,
408 	(0x4e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x5e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x6e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0x7e00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x8e00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0x9e00 << 16) | (0xc900 >> 2),
419 	0x00000000,
420 	(0xae00 << 16) | (0xc900 >> 2),
421 	0x00000000,
422 	(0xbe00 << 16) | (0xc900 >> 2),
423 	0x00000000,
424 	(0x4e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x5e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x6e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0x7e00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x8e00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0x9e00 << 16) | (0xc904 >> 2),
435 	0x00000000,
436 	(0xae00 << 16) | (0xc904 >> 2),
437 	0x00000000,
438 	(0xbe00 << 16) | (0xc904 >> 2),
439 	0x00000000,
440 	(0x4e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x5e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x6e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0x7e00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x8e00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0x9e00 << 16) | (0xc908 >> 2),
451 	0x00000000,
452 	(0xae00 << 16) | (0xc908 >> 2),
453 	0x00000000,
454 	(0xbe00 << 16) | (0xc908 >> 2),
455 	0x00000000,
456 	(0x4e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x5e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x6e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0x7e00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x8e00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0x9e00 << 16) | (0xc90c >> 2),
467 	0x00000000,
468 	(0xae00 << 16) | (0xc90c >> 2),
469 	0x00000000,
470 	(0xbe00 << 16) | (0xc90c >> 2),
471 	0x00000000,
472 	(0x4e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x5e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x6e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0x7e00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x8e00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0x9e00 << 16) | (0xc910 >> 2),
483 	0x00000000,
484 	(0xae00 << 16) | (0xc910 >> 2),
485 	0x00000000,
486 	(0xbe00 << 16) | (0xc910 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xc99c >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x9834 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f00 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f00 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f04 >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f04 >> 2),
499 	0x00000000,
500 	(0x0000 << 16) | (0x30f08 >> 2),
501 	0x00000000,
502 	(0x0001 << 16) | (0x30f08 >> 2),
503 	0x00000000,
504 	(0x0000 << 16) | (0x30f0c >> 2),
505 	0x00000000,
506 	(0x0001 << 16) | (0x30f0c >> 2),
507 	0x00000000,
508 	(0x0600 << 16) | (0x9b7c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8a14 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8a18 >> 2),
513 	0x00000000,
514 	(0x0600 << 16) | (0x30a00 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8bf0 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x8bcc >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x8b24 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x30a04 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a10 >> 2),
525 	0x00000000,
526 	(0x0600 << 16) | (0x30a14 >> 2),
527 	0x00000000,
528 	(0x0600 << 16) | (0x30a18 >> 2),
529 	0x00000000,
530 	(0x0600 << 16) | (0x30a2c >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc700 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xc704 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0xc708 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xc768 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc770 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc774 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc778 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc77c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc780 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc784 >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc788 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc78c >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc798 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc79c >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7a0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7a4 >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7a8 >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0xc7ac >> 2),
567 	0x00000000,
568 	(0x0400 << 16) | (0xc7b0 >> 2),
569 	0x00000000,
570 	(0x0400 << 16) | (0xc7b4 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x9100 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x3c010 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92a8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92ac >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92b4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92b8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92bc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92c0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92c4 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x92c8 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x92cc >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x92d0 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c00 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c04 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8c20 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x8c38 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x8c3c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xae00 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x9604 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac08 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac0c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac10 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac14 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac58 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac68 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac6c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac70 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac74 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac78 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac7c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac80 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xac84 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0xac88 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0xac8c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x970c >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x9714 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x9718 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x971c >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x4e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x5e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x6e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0x7e00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x8e00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0x9e00 << 16) | (0x31068 >> 2),
661 	0x00000000,
662 	(0xae00 << 16) | (0x31068 >> 2),
663 	0x00000000,
664 	(0xbe00 << 16) | (0x31068 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xcd10 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xcd14 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88b0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88b4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88b8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88bc >> 2),
677 	0x00000000,
678 	(0x0400 << 16) | (0x89c0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88c4 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88c8 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x88d0 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x88d4 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x88d8 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x8980 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30938 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x3093c >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30940 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x89a0 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x30900 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x30904 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x89b4 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x3c210 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x3c214 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0x3c218 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x8904 >> 2),
713 	0x00000000,
714 	0x5,
715 	(0x0e00 << 16) | (0x8c28 >> 2),
716 	(0x0e00 << 16) | (0x8c2c >> 2),
717 	(0x0e00 << 16) | (0x8c30 >> 2),
718 	(0x0e00 << 16) | (0x8c34 >> 2),
719 	(0x0e00 << 16) | (0x9600 >> 2),
720 };
721 
722 static const u32 kalindi_rlc_save_restore_register_list[] =
723 {
724 	(0x0e00 << 16) | (0xc12c >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc140 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc150 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc15c >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc168 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc170 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc204 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2b4 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc2b8 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0xc2bc >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0xc2c0 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x8228 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x829c >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x869c >> 2),
751 	0x00000000,
752 	(0x0600 << 16) | (0x98f4 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x98f8 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x9900 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0xc260 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x90e8 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x3c000 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x3c00c >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0x8c1c >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0x9700 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x4e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x5e00 << 16) | (0xcd20 >> 2),
775 	0x00000000,
776 	(0x6e00 << 16) | (0xcd20 >> 2),
777 	0x00000000,
778 	(0x7e00 << 16) | (0xcd20 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0x89bc >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0x8900 >> 2),
783 	0x00000000,
784 	0x3,
785 	(0x0e00 << 16) | (0xc130 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc134 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc1fc >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc208 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc264 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc268 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc26c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc270 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc274 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc28c >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc290 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc294 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc298 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2a0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xc2a4 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xc2a8 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0xc2ac >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x301d0 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30238 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x30250 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x30254 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x30258 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x3025c >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc900 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc900 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc900 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc904 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc904 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc904 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc908 >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc908 >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc908 >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc90c >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc90c >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc90c >> 2),
862 	0x00000000,
863 	(0x4e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x5e00 << 16) | (0xc910 >> 2),
866 	0x00000000,
867 	(0x6e00 << 16) | (0xc910 >> 2),
868 	0x00000000,
869 	(0x7e00 << 16) | (0xc910 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xc99c >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x9834 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f00 >> 2),
876 	0x00000000,
877 	(0x0000 << 16) | (0x30f04 >> 2),
878 	0x00000000,
879 	(0x0000 << 16) | (0x30f08 >> 2),
880 	0x00000000,
881 	(0x0000 << 16) | (0x30f0c >> 2),
882 	0x00000000,
883 	(0x0600 << 16) | (0x9b7c >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8a14 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8a18 >> 2),
888 	0x00000000,
889 	(0x0600 << 16) | (0x30a00 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8bf0 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x8bcc >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x8b24 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x30a04 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a10 >> 2),
900 	0x00000000,
901 	(0x0600 << 16) | (0x30a14 >> 2),
902 	0x00000000,
903 	(0x0600 << 16) | (0x30a18 >> 2),
904 	0x00000000,
905 	(0x0600 << 16) | (0x30a2c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc700 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0xc704 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0xc708 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0xc768 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc770 >> 2),
916 	0x00000000,
917 	(0x0400 << 16) | (0xc774 >> 2),
918 	0x00000000,
919 	(0x0400 << 16) | (0xc798 >> 2),
920 	0x00000000,
921 	(0x0400 << 16) | (0xc79c >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x9100 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x3c010 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c00 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c04 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x8c20 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x8c38 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x8c3c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xae00 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x9604 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac08 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac0c >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac10 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac14 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac58 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac68 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac6c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac70 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac74 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac78 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac7c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac80 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xac84 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0xac88 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0xac8c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x970c >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x9714 >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x9718 >> 2),
976 	0x00000000,
977 	(0x0e00 << 16) | (0x971c >> 2),
978 	0x00000000,
979 	(0x0e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x4e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x5e00 << 16) | (0x31068 >> 2),
984 	0x00000000,
985 	(0x6e00 << 16) | (0x31068 >> 2),
986 	0x00000000,
987 	(0x7e00 << 16) | (0x31068 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0xcd10 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0xcd14 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88b0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88b4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88b8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88bc >> 2),
1000 	0x00000000,
1001 	(0x0400 << 16) | (0x89c0 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88c4 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88c8 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x88d0 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x88d4 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x88d8 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x8980 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30938 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x3093c >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30940 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x89a0 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x30900 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x30904 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x89b4 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3e1fc >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x3c210 >> 2),
1032 	0x00000000,
1033 	(0x0e00 << 16) | (0x3c214 >> 2),
1034 	0x00000000,
1035 	(0x0e00 << 16) | (0x3c218 >> 2),
1036 	0x00000000,
1037 	(0x0e00 << 16) | (0x8904 >> 2),
1038 	0x00000000,
1039 	0x5,
1040 	(0x0e00 << 16) | (0x8c28 >> 2),
1041 	(0x0e00 << 16) | (0x8c2c >> 2),
1042 	(0x0e00 << 16) | (0x8c30 >> 2),
1043 	(0x0e00 << 16) | (0x8c34 >> 2),
1044 	(0x0e00 << 16) | (0x9600 >> 2),
1045 };
1046 
1047 static const u32 bonaire_golden_spm_registers[] =
1048 {
1049 	0x30800, 0xe0ffffff, 0xe0000000
1050 };
1051 
1052 static const u32 bonaire_golden_common_registers[] =
1053 {
1054 	0xc770, 0xffffffff, 0x00000800,
1055 	0xc774, 0xffffffff, 0x00000800,
1056 	0xc798, 0xffffffff, 0x00007fbf,
1057 	0xc79c, 0xffffffff, 0x00007faf
1058 };
1059 
1060 static const u32 bonaire_golden_registers[] =
1061 {
1062 	0x3354, 0x00000333, 0x00000333,
1063 	0x3350, 0x000c0fc0, 0x00040200,
1064 	0x9a10, 0x00010000, 0x00058208,
1065 	0x3c000, 0xffff1fff, 0x00140000,
1066 	0x3c200, 0xfdfc0fff, 0x00000100,
1067 	0x3c234, 0x40000000, 0x40000200,
1068 	0x9830, 0xffffffff, 0x00000000,
1069 	0x9834, 0xf00fffff, 0x00000400,
1070 	0x9838, 0x0002021c, 0x00020200,
1071 	0xc78, 0x00000080, 0x00000000,
1072 	0x5bb0, 0x000000f0, 0x00000070,
1073 	0x5bc0, 0xf0311fff, 0x80300000,
1074 	0x98f8, 0x73773777, 0x12010001,
1075 	0x350c, 0x00810000, 0x408af000,
1076 	0x7030, 0x31000111, 0x00000011,
1077 	0x2f48, 0x73773777, 0x12010001,
1078 	0x220c, 0x00007fb6, 0x0021a1b1,
1079 	0x2210, 0x00007fb6, 0x002021b1,
1080 	0x2180, 0x00007fb6, 0x00002191,
1081 	0x2218, 0x00007fb6, 0x002121b1,
1082 	0x221c, 0x00007fb6, 0x002021b1,
1083 	0x21dc, 0x00007fb6, 0x00002191,
1084 	0x21e0, 0x00007fb6, 0x00002191,
1085 	0x3628, 0x0000003f, 0x0000000a,
1086 	0x362c, 0x0000003f, 0x0000000a,
1087 	0x2ae4, 0x00073ffe, 0x000022a2,
1088 	0x240c, 0x000007ff, 0x00000000,
1089 	0x8a14, 0xf000003f, 0x00000007,
1090 	0x8bf0, 0x00002001, 0x00000001,
1091 	0x8b24, 0xffffffff, 0x00ffffff,
1092 	0x30a04, 0x0000ff0f, 0x00000000,
1093 	0x28a4c, 0x07ffffff, 0x06000000,
1094 	0x4d8, 0x00000fff, 0x00000100,
1095 	0x3e78, 0x00000001, 0x00000002,
1096 	0x9100, 0x03000000, 0x0362c688,
1097 	0x8c00, 0x000000ff, 0x00000001,
1098 	0xe40, 0x00001fff, 0x00001fff,
1099 	0x9060, 0x0000007f, 0x00000020,
1100 	0x9508, 0x00010000, 0x00010000,
1101 	0xac14, 0x000003ff, 0x000000f3,
1102 	0xac0c, 0xffffffff, 0x00001032
1103 };
1104 
1105 static const u32 bonaire_mgcg_cgcg_init[] =
1106 {
1107 	0xc420, 0xffffffff, 0xfffffffc,
1108 	0x30800, 0xffffffff, 0xe0000000,
1109 	0x3c2a0, 0xffffffff, 0x00000100,
1110 	0x3c208, 0xffffffff, 0x00000100,
1111 	0x3c2c0, 0xffffffff, 0xc0000100,
1112 	0x3c2c8, 0xffffffff, 0xc0000100,
1113 	0x3c2c4, 0xffffffff, 0xc0000100,
1114 	0x55e4, 0xffffffff, 0x00600100,
1115 	0x3c280, 0xffffffff, 0x00000100,
1116 	0x3c214, 0xffffffff, 0x06000100,
1117 	0x3c220, 0xffffffff, 0x00000100,
1118 	0x3c218, 0xffffffff, 0x06000100,
1119 	0x3c204, 0xffffffff, 0x00000100,
1120 	0x3c2e0, 0xffffffff, 0x00000100,
1121 	0x3c224, 0xffffffff, 0x00000100,
1122 	0x3c200, 0xffffffff, 0x00000100,
1123 	0x3c230, 0xffffffff, 0x00000100,
1124 	0x3c234, 0xffffffff, 0x00000100,
1125 	0x3c250, 0xffffffff, 0x00000100,
1126 	0x3c254, 0xffffffff, 0x00000100,
1127 	0x3c258, 0xffffffff, 0x00000100,
1128 	0x3c25c, 0xffffffff, 0x00000100,
1129 	0x3c260, 0xffffffff, 0x00000100,
1130 	0x3c27c, 0xffffffff, 0x00000100,
1131 	0x3c278, 0xffffffff, 0x00000100,
1132 	0x3c210, 0xffffffff, 0x06000100,
1133 	0x3c290, 0xffffffff, 0x00000100,
1134 	0x3c274, 0xffffffff, 0x00000100,
1135 	0x3c2b4, 0xffffffff, 0x00000100,
1136 	0x3c2b0, 0xffffffff, 0x00000100,
1137 	0x3c270, 0xffffffff, 0x00000100,
1138 	0x30800, 0xffffffff, 0xe0000000,
1139 	0x3c020, 0xffffffff, 0x00010000,
1140 	0x3c024, 0xffffffff, 0x00030002,
1141 	0x3c028, 0xffffffff, 0x00040007,
1142 	0x3c02c, 0xffffffff, 0x00060005,
1143 	0x3c030, 0xffffffff, 0x00090008,
1144 	0x3c034, 0xffffffff, 0x00010000,
1145 	0x3c038, 0xffffffff, 0x00030002,
1146 	0x3c03c, 0xffffffff, 0x00040007,
1147 	0x3c040, 0xffffffff, 0x00060005,
1148 	0x3c044, 0xffffffff, 0x00090008,
1149 	0x3c048, 0xffffffff, 0x00010000,
1150 	0x3c04c, 0xffffffff, 0x00030002,
1151 	0x3c050, 0xffffffff, 0x00040007,
1152 	0x3c054, 0xffffffff, 0x00060005,
1153 	0x3c058, 0xffffffff, 0x00090008,
1154 	0x3c05c, 0xffffffff, 0x00010000,
1155 	0x3c060, 0xffffffff, 0x00030002,
1156 	0x3c064, 0xffffffff, 0x00040007,
1157 	0x3c068, 0xffffffff, 0x00060005,
1158 	0x3c06c, 0xffffffff, 0x00090008,
1159 	0x3c070, 0xffffffff, 0x00010000,
1160 	0x3c074, 0xffffffff, 0x00030002,
1161 	0x3c078, 0xffffffff, 0x00040007,
1162 	0x3c07c, 0xffffffff, 0x00060005,
1163 	0x3c080, 0xffffffff, 0x00090008,
1164 	0x3c084, 0xffffffff, 0x00010000,
1165 	0x3c088, 0xffffffff, 0x00030002,
1166 	0x3c08c, 0xffffffff, 0x00040007,
1167 	0x3c090, 0xffffffff, 0x00060005,
1168 	0x3c094, 0xffffffff, 0x00090008,
1169 	0x3c098, 0xffffffff, 0x00010000,
1170 	0x3c09c, 0xffffffff, 0x00030002,
1171 	0x3c0a0, 0xffffffff, 0x00040007,
1172 	0x3c0a4, 0xffffffff, 0x00060005,
1173 	0x3c0a8, 0xffffffff, 0x00090008,
1174 	0x3c000, 0xffffffff, 0x96e00200,
1175 	0x8708, 0xffffffff, 0x00900100,
1176 	0xc424, 0xffffffff, 0x0020003f,
1177 	0x38, 0xffffffff, 0x0140001c,
1178 	0x3c, 0x000f0000, 0x000f0000,
1179 	0x220, 0xffffffff, 0xC060000C,
1180 	0x224, 0xc0000fff, 0x00000100,
1181 	0xf90, 0xffffffff, 0x00000100,
1182 	0xf98, 0x00000101, 0x00000000,
1183 	0x20a8, 0xffffffff, 0x00000104,
1184 	0x55e4, 0xff000fff, 0x00000100,
1185 	0x30cc, 0xc0000fff, 0x00000104,
1186 	0xc1e4, 0x00000001, 0x00000001,
1187 	0xd00c, 0xff000ff0, 0x00000100,
1188 	0xd80c, 0xff000ff0, 0x00000100
1189 };
1190 
1191 static const u32 spectre_golden_spm_registers[] =
1192 {
1193 	0x30800, 0xe0ffffff, 0xe0000000
1194 };
1195 
1196 static const u32 spectre_golden_common_registers[] =
1197 {
1198 	0xc770, 0xffffffff, 0x00000800,
1199 	0xc774, 0xffffffff, 0x00000800,
1200 	0xc798, 0xffffffff, 0x00007fbf,
1201 	0xc79c, 0xffffffff, 0x00007faf
1202 };
1203 
1204 static const u32 spectre_golden_registers[] =
1205 {
1206 	0x3c000, 0xffff1fff, 0x96940200,
1207 	0x3c00c, 0xffff0001, 0xff000000,
1208 	0x3c200, 0xfffc0fff, 0x00000100,
1209 	0x6ed8, 0x00010101, 0x00010000,
1210 	0x9834, 0xf00fffff, 0x00000400,
1211 	0x9838, 0xfffffffc, 0x00020200,
1212 	0x5bb0, 0x000000f0, 0x00000070,
1213 	0x5bc0, 0xf0311fff, 0x80300000,
1214 	0x98f8, 0x73773777, 0x12010001,
1215 	0x9b7c, 0x00ff0000, 0x00fc0000,
1216 	0x2f48, 0x73773777, 0x12010001,
1217 	0x8a14, 0xf000003f, 0x00000007,
1218 	0x8b24, 0xffffffff, 0x00ffffff,
1219 	0x28350, 0x3f3f3fff, 0x00000082,
1220 	0x28354, 0x0000003f, 0x00000000,
1221 	0x3e78, 0x00000001, 0x00000002,
1222 	0x913c, 0xffff03df, 0x00000004,
1223 	0xc768, 0x00000008, 0x00000008,
1224 	0x8c00, 0x000008ff, 0x00000800,
1225 	0x9508, 0x00010000, 0x00010000,
1226 	0xac0c, 0xffffffff, 0x54763210,
1227 	0x214f8, 0x01ff01ff, 0x00000002,
1228 	0x21498, 0x007ff800, 0x00200000,
1229 	0x2015c, 0xffffffff, 0x00000f40,
1230 	0x30934, 0xffffffff, 0x00000001
1231 };
1232 
1233 static const u32 spectre_mgcg_cgcg_init[] =
1234 {
1235 	0xc420, 0xffffffff, 0xfffffffc,
1236 	0x30800, 0xffffffff, 0xe0000000,
1237 	0x3c2a0, 0xffffffff, 0x00000100,
1238 	0x3c208, 0xffffffff, 0x00000100,
1239 	0x3c2c0, 0xffffffff, 0x00000100,
1240 	0x3c2c8, 0xffffffff, 0x00000100,
1241 	0x3c2c4, 0xffffffff, 0x00000100,
1242 	0x55e4, 0xffffffff, 0x00600100,
1243 	0x3c280, 0xffffffff, 0x00000100,
1244 	0x3c214, 0xffffffff, 0x06000100,
1245 	0x3c220, 0xffffffff, 0x00000100,
1246 	0x3c218, 0xffffffff, 0x06000100,
1247 	0x3c204, 0xffffffff, 0x00000100,
1248 	0x3c2e0, 0xffffffff, 0x00000100,
1249 	0x3c224, 0xffffffff, 0x00000100,
1250 	0x3c200, 0xffffffff, 0x00000100,
1251 	0x3c230, 0xffffffff, 0x00000100,
1252 	0x3c234, 0xffffffff, 0x00000100,
1253 	0x3c250, 0xffffffff, 0x00000100,
1254 	0x3c254, 0xffffffff, 0x00000100,
1255 	0x3c258, 0xffffffff, 0x00000100,
1256 	0x3c25c, 0xffffffff, 0x00000100,
1257 	0x3c260, 0xffffffff, 0x00000100,
1258 	0x3c27c, 0xffffffff, 0x00000100,
1259 	0x3c278, 0xffffffff, 0x00000100,
1260 	0x3c210, 0xffffffff, 0x06000100,
1261 	0x3c290, 0xffffffff, 0x00000100,
1262 	0x3c274, 0xffffffff, 0x00000100,
1263 	0x3c2b4, 0xffffffff, 0x00000100,
1264 	0x3c2b0, 0xffffffff, 0x00000100,
1265 	0x3c270, 0xffffffff, 0x00000100,
1266 	0x30800, 0xffffffff, 0xe0000000,
1267 	0x3c020, 0xffffffff, 0x00010000,
1268 	0x3c024, 0xffffffff, 0x00030002,
1269 	0x3c028, 0xffffffff, 0x00040007,
1270 	0x3c02c, 0xffffffff, 0x00060005,
1271 	0x3c030, 0xffffffff, 0x00090008,
1272 	0x3c034, 0xffffffff, 0x00010000,
1273 	0x3c038, 0xffffffff, 0x00030002,
1274 	0x3c03c, 0xffffffff, 0x00040007,
1275 	0x3c040, 0xffffffff, 0x00060005,
1276 	0x3c044, 0xffffffff, 0x00090008,
1277 	0x3c048, 0xffffffff, 0x00010000,
1278 	0x3c04c, 0xffffffff, 0x00030002,
1279 	0x3c050, 0xffffffff, 0x00040007,
1280 	0x3c054, 0xffffffff, 0x00060005,
1281 	0x3c058, 0xffffffff, 0x00090008,
1282 	0x3c05c, 0xffffffff, 0x00010000,
1283 	0x3c060, 0xffffffff, 0x00030002,
1284 	0x3c064, 0xffffffff, 0x00040007,
1285 	0x3c068, 0xffffffff, 0x00060005,
1286 	0x3c06c, 0xffffffff, 0x00090008,
1287 	0x3c070, 0xffffffff, 0x00010000,
1288 	0x3c074, 0xffffffff, 0x00030002,
1289 	0x3c078, 0xffffffff, 0x00040007,
1290 	0x3c07c, 0xffffffff, 0x00060005,
1291 	0x3c080, 0xffffffff, 0x00090008,
1292 	0x3c084, 0xffffffff, 0x00010000,
1293 	0x3c088, 0xffffffff, 0x00030002,
1294 	0x3c08c, 0xffffffff, 0x00040007,
1295 	0x3c090, 0xffffffff, 0x00060005,
1296 	0x3c094, 0xffffffff, 0x00090008,
1297 	0x3c098, 0xffffffff, 0x00010000,
1298 	0x3c09c, 0xffffffff, 0x00030002,
1299 	0x3c0a0, 0xffffffff, 0x00040007,
1300 	0x3c0a4, 0xffffffff, 0x00060005,
1301 	0x3c0a8, 0xffffffff, 0x00090008,
1302 	0x3c0ac, 0xffffffff, 0x00010000,
1303 	0x3c0b0, 0xffffffff, 0x00030002,
1304 	0x3c0b4, 0xffffffff, 0x00040007,
1305 	0x3c0b8, 0xffffffff, 0x00060005,
1306 	0x3c0bc, 0xffffffff, 0x00090008,
1307 	0x3c000, 0xffffffff, 0x96e00200,
1308 	0x8708, 0xffffffff, 0x00900100,
1309 	0xc424, 0xffffffff, 0x0020003f,
1310 	0x38, 0xffffffff, 0x0140001c,
1311 	0x3c, 0x000f0000, 0x000f0000,
1312 	0x220, 0xffffffff, 0xC060000C,
1313 	0x224, 0xc0000fff, 0x00000100,
1314 	0xf90, 0xffffffff, 0x00000100,
1315 	0xf98, 0x00000101, 0x00000000,
1316 	0x20a8, 0xffffffff, 0x00000104,
1317 	0x55e4, 0xff000fff, 0x00000100,
1318 	0x30cc, 0xc0000fff, 0x00000104,
1319 	0xc1e4, 0x00000001, 0x00000001,
1320 	0xd00c, 0xff000ff0, 0x00000100,
1321 	0xd80c, 0xff000ff0, 0x00000100
1322 };
1323 
1324 static const u32 kalindi_golden_spm_registers[] =
1325 {
1326 	0x30800, 0xe0ffffff, 0xe0000000
1327 };
1328 
1329 static const u32 kalindi_golden_common_registers[] =
1330 {
1331 	0xc770, 0xffffffff, 0x00000800,
1332 	0xc774, 0xffffffff, 0x00000800,
1333 	0xc798, 0xffffffff, 0x00007fbf,
1334 	0xc79c, 0xffffffff, 0x00007faf
1335 };
1336 
1337 static const u32 kalindi_golden_registers[] =
1338 {
1339 	0x3c000, 0xffffdfff, 0x6e944040,
1340 	0x55e4, 0xff607fff, 0xfc000100,
1341 	0x3c220, 0xff000fff, 0x00000100,
1342 	0x3c224, 0xff000fff, 0x00000100,
1343 	0x3c200, 0xfffc0fff, 0x00000100,
1344 	0x6ed8, 0x00010101, 0x00010000,
1345 	0x9830, 0xffffffff, 0x00000000,
1346 	0x9834, 0xf00fffff, 0x00000400,
1347 	0x5bb0, 0x000000f0, 0x00000070,
1348 	0x5bc0, 0xf0311fff, 0x80300000,
1349 	0x98f8, 0x73773777, 0x12010001,
1350 	0x98fc, 0xffffffff, 0x00000010,
1351 	0x9b7c, 0x00ff0000, 0x00fc0000,
1352 	0x8030, 0x00001f0f, 0x0000100a,
1353 	0x2f48, 0x73773777, 0x12010001,
1354 	0x2408, 0x000fffff, 0x000c007f,
1355 	0x8a14, 0xf000003f, 0x00000007,
1356 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1357 	0x30a04, 0x0000ff0f, 0x00000000,
1358 	0x28a4c, 0x07ffffff, 0x06000000,
1359 	0x4d8, 0x00000fff, 0x00000100,
1360 	0x3e78, 0x00000001, 0x00000002,
1361 	0xc768, 0x00000008, 0x00000008,
1362 	0x8c00, 0x000000ff, 0x00000003,
1363 	0x214f8, 0x01ff01ff, 0x00000002,
1364 	0x21498, 0x007ff800, 0x00200000,
1365 	0x2015c, 0xffffffff, 0x00000f40,
1366 	0x88c4, 0x001f3ae3, 0x00000082,
1367 	0x88d4, 0x0000001f, 0x00000010,
1368 	0x30934, 0xffffffff, 0x00000000
1369 };
1370 
1371 static const u32 kalindi_mgcg_cgcg_init[] =
1372 {
1373 	0xc420, 0xffffffff, 0xfffffffc,
1374 	0x30800, 0xffffffff, 0xe0000000,
1375 	0x3c2a0, 0xffffffff, 0x00000100,
1376 	0x3c208, 0xffffffff, 0x00000100,
1377 	0x3c2c0, 0xffffffff, 0x00000100,
1378 	0x3c2c8, 0xffffffff, 0x00000100,
1379 	0x3c2c4, 0xffffffff, 0x00000100,
1380 	0x55e4, 0xffffffff, 0x00600100,
1381 	0x3c280, 0xffffffff, 0x00000100,
1382 	0x3c214, 0xffffffff, 0x06000100,
1383 	0x3c220, 0xffffffff, 0x00000100,
1384 	0x3c218, 0xffffffff, 0x06000100,
1385 	0x3c204, 0xffffffff, 0x00000100,
1386 	0x3c2e0, 0xffffffff, 0x00000100,
1387 	0x3c224, 0xffffffff, 0x00000100,
1388 	0x3c200, 0xffffffff, 0x00000100,
1389 	0x3c230, 0xffffffff, 0x00000100,
1390 	0x3c234, 0xffffffff, 0x00000100,
1391 	0x3c250, 0xffffffff, 0x00000100,
1392 	0x3c254, 0xffffffff, 0x00000100,
1393 	0x3c258, 0xffffffff, 0x00000100,
1394 	0x3c25c, 0xffffffff, 0x00000100,
1395 	0x3c260, 0xffffffff, 0x00000100,
1396 	0x3c27c, 0xffffffff, 0x00000100,
1397 	0x3c278, 0xffffffff, 0x00000100,
1398 	0x3c210, 0xffffffff, 0x06000100,
1399 	0x3c290, 0xffffffff, 0x00000100,
1400 	0x3c274, 0xffffffff, 0x00000100,
1401 	0x3c2b4, 0xffffffff, 0x00000100,
1402 	0x3c2b0, 0xffffffff, 0x00000100,
1403 	0x3c270, 0xffffffff, 0x00000100,
1404 	0x30800, 0xffffffff, 0xe0000000,
1405 	0x3c020, 0xffffffff, 0x00010000,
1406 	0x3c024, 0xffffffff, 0x00030002,
1407 	0x3c028, 0xffffffff, 0x00040007,
1408 	0x3c02c, 0xffffffff, 0x00060005,
1409 	0x3c030, 0xffffffff, 0x00090008,
1410 	0x3c034, 0xffffffff, 0x00010000,
1411 	0x3c038, 0xffffffff, 0x00030002,
1412 	0x3c03c, 0xffffffff, 0x00040007,
1413 	0x3c040, 0xffffffff, 0x00060005,
1414 	0x3c044, 0xffffffff, 0x00090008,
1415 	0x3c000, 0xffffffff, 0x96e00200,
1416 	0x8708, 0xffffffff, 0x00900100,
1417 	0xc424, 0xffffffff, 0x0020003f,
1418 	0x38, 0xffffffff, 0x0140001c,
1419 	0x3c, 0x000f0000, 0x000f0000,
1420 	0x220, 0xffffffff, 0xC060000C,
1421 	0x224, 0xc0000fff, 0x00000100,
1422 	0x20a8, 0xffffffff, 0x00000104,
1423 	0x55e4, 0xff000fff, 0x00000100,
1424 	0x30cc, 0xc0000fff, 0x00000104,
1425 	0xc1e4, 0x00000001, 0x00000001,
1426 	0xd00c, 0xff000ff0, 0x00000100,
1427 	0xd80c, 0xff000ff0, 0x00000100
1428 };
1429 
1430 static const u32 hawaii_golden_spm_registers[] =
1431 {
1432 	0x30800, 0xe0ffffff, 0xe0000000
1433 };
1434 
1435 static const u32 hawaii_golden_common_registers[] =
1436 {
1437 	0x30800, 0xffffffff, 0xe0000000,
1438 	0x28350, 0xffffffff, 0x3a00161a,
1439 	0x28354, 0xffffffff, 0x0000002e,
1440 	0x9a10, 0xffffffff, 0x00018208,
1441 	0x98f8, 0xffffffff, 0x12011003
1442 };
1443 
1444 static const u32 hawaii_golden_registers[] =
1445 {
1446 	0x3354, 0x00000333, 0x00000333,
1447 	0x9a10, 0x00010000, 0x00058208,
1448 	0x9830, 0xffffffff, 0x00000000,
1449 	0x9834, 0xf00fffff, 0x00000400,
1450 	0x9838, 0x0002021c, 0x00020200,
1451 	0xc78, 0x00000080, 0x00000000,
1452 	0x5bb0, 0x000000f0, 0x00000070,
1453 	0x5bc0, 0xf0311fff, 0x80300000,
1454 	0x350c, 0x00810000, 0x408af000,
1455 	0x7030, 0x31000111, 0x00000011,
1456 	0x2f48, 0x73773777, 0x12010001,
1457 	0x2120, 0x0000007f, 0x0000001b,
1458 	0x21dc, 0x00007fb6, 0x00002191,
1459 	0x3628, 0x0000003f, 0x0000000a,
1460 	0x362c, 0x0000003f, 0x0000000a,
1461 	0x2ae4, 0x00073ffe, 0x000022a2,
1462 	0x240c, 0x000007ff, 0x00000000,
1463 	0x8bf0, 0x00002001, 0x00000001,
1464 	0x8b24, 0xffffffff, 0x00ffffff,
1465 	0x30a04, 0x0000ff0f, 0x00000000,
1466 	0x28a4c, 0x07ffffff, 0x06000000,
1467 	0x3e78, 0x00000001, 0x00000002,
1468 	0xc768, 0x00000008, 0x00000008,
1469 	0xc770, 0x00000f00, 0x00000800,
1470 	0xc774, 0x00000f00, 0x00000800,
1471 	0xc798, 0x00ffffff, 0x00ff7fbf,
1472 	0xc79c, 0x00ffffff, 0x00ff7faf,
1473 	0x8c00, 0x000000ff, 0x00000800,
1474 	0xe40, 0x00001fff, 0x00001fff,
1475 	0x9060, 0x0000007f, 0x00000020,
1476 	0x9508, 0x00010000, 0x00010000,
1477 	0xae00, 0x00100000, 0x000ff07c,
1478 	0xac14, 0x000003ff, 0x0000000f,
1479 	0xac10, 0xffffffff, 0x7564fdec,
1480 	0xac0c, 0xffffffff, 0x3120b9a8,
1481 	0xac08, 0x20000000, 0x0f9c0000
1482 };
1483 
1484 static const u32 hawaii_mgcg_cgcg_init[] =
1485 {
1486 	0xc420, 0xffffffff, 0xfffffffd,
1487 	0x30800, 0xffffffff, 0xe0000000,
1488 	0x3c2a0, 0xffffffff, 0x00000100,
1489 	0x3c208, 0xffffffff, 0x00000100,
1490 	0x3c2c0, 0xffffffff, 0x00000100,
1491 	0x3c2c8, 0xffffffff, 0x00000100,
1492 	0x3c2c4, 0xffffffff, 0x00000100,
1493 	0x55e4, 0xffffffff, 0x00200100,
1494 	0x3c280, 0xffffffff, 0x00000100,
1495 	0x3c214, 0xffffffff, 0x06000100,
1496 	0x3c220, 0xffffffff, 0x00000100,
1497 	0x3c218, 0xffffffff, 0x06000100,
1498 	0x3c204, 0xffffffff, 0x00000100,
1499 	0x3c2e0, 0xffffffff, 0x00000100,
1500 	0x3c224, 0xffffffff, 0x00000100,
1501 	0x3c200, 0xffffffff, 0x00000100,
1502 	0x3c230, 0xffffffff, 0x00000100,
1503 	0x3c234, 0xffffffff, 0x00000100,
1504 	0x3c250, 0xffffffff, 0x00000100,
1505 	0x3c254, 0xffffffff, 0x00000100,
1506 	0x3c258, 0xffffffff, 0x00000100,
1507 	0x3c25c, 0xffffffff, 0x00000100,
1508 	0x3c260, 0xffffffff, 0x00000100,
1509 	0x3c27c, 0xffffffff, 0x00000100,
1510 	0x3c278, 0xffffffff, 0x00000100,
1511 	0x3c210, 0xffffffff, 0x06000100,
1512 	0x3c290, 0xffffffff, 0x00000100,
1513 	0x3c274, 0xffffffff, 0x00000100,
1514 	0x3c2b4, 0xffffffff, 0x00000100,
1515 	0x3c2b0, 0xffffffff, 0x00000100,
1516 	0x3c270, 0xffffffff, 0x00000100,
1517 	0x30800, 0xffffffff, 0xe0000000,
1518 	0x3c020, 0xffffffff, 0x00010000,
1519 	0x3c024, 0xffffffff, 0x00030002,
1520 	0x3c028, 0xffffffff, 0x00040007,
1521 	0x3c02c, 0xffffffff, 0x00060005,
1522 	0x3c030, 0xffffffff, 0x00090008,
1523 	0x3c034, 0xffffffff, 0x00010000,
1524 	0x3c038, 0xffffffff, 0x00030002,
1525 	0x3c03c, 0xffffffff, 0x00040007,
1526 	0x3c040, 0xffffffff, 0x00060005,
1527 	0x3c044, 0xffffffff, 0x00090008,
1528 	0x3c048, 0xffffffff, 0x00010000,
1529 	0x3c04c, 0xffffffff, 0x00030002,
1530 	0x3c050, 0xffffffff, 0x00040007,
1531 	0x3c054, 0xffffffff, 0x00060005,
1532 	0x3c058, 0xffffffff, 0x00090008,
1533 	0x3c05c, 0xffffffff, 0x00010000,
1534 	0x3c060, 0xffffffff, 0x00030002,
1535 	0x3c064, 0xffffffff, 0x00040007,
1536 	0x3c068, 0xffffffff, 0x00060005,
1537 	0x3c06c, 0xffffffff, 0x00090008,
1538 	0x3c070, 0xffffffff, 0x00010000,
1539 	0x3c074, 0xffffffff, 0x00030002,
1540 	0x3c078, 0xffffffff, 0x00040007,
1541 	0x3c07c, 0xffffffff, 0x00060005,
1542 	0x3c080, 0xffffffff, 0x00090008,
1543 	0x3c084, 0xffffffff, 0x00010000,
1544 	0x3c088, 0xffffffff, 0x00030002,
1545 	0x3c08c, 0xffffffff, 0x00040007,
1546 	0x3c090, 0xffffffff, 0x00060005,
1547 	0x3c094, 0xffffffff, 0x00090008,
1548 	0x3c098, 0xffffffff, 0x00010000,
1549 	0x3c09c, 0xffffffff, 0x00030002,
1550 	0x3c0a0, 0xffffffff, 0x00040007,
1551 	0x3c0a4, 0xffffffff, 0x00060005,
1552 	0x3c0a8, 0xffffffff, 0x00090008,
1553 	0x3c0ac, 0xffffffff, 0x00010000,
1554 	0x3c0b0, 0xffffffff, 0x00030002,
1555 	0x3c0b4, 0xffffffff, 0x00040007,
1556 	0x3c0b8, 0xffffffff, 0x00060005,
1557 	0x3c0bc, 0xffffffff, 0x00090008,
1558 	0x3c0c0, 0xffffffff, 0x00010000,
1559 	0x3c0c4, 0xffffffff, 0x00030002,
1560 	0x3c0c8, 0xffffffff, 0x00040007,
1561 	0x3c0cc, 0xffffffff, 0x00060005,
1562 	0x3c0d0, 0xffffffff, 0x00090008,
1563 	0x3c0d4, 0xffffffff, 0x00010000,
1564 	0x3c0d8, 0xffffffff, 0x00030002,
1565 	0x3c0dc, 0xffffffff, 0x00040007,
1566 	0x3c0e0, 0xffffffff, 0x00060005,
1567 	0x3c0e4, 0xffffffff, 0x00090008,
1568 	0x3c0e8, 0xffffffff, 0x00010000,
1569 	0x3c0ec, 0xffffffff, 0x00030002,
1570 	0x3c0f0, 0xffffffff, 0x00040007,
1571 	0x3c0f4, 0xffffffff, 0x00060005,
1572 	0x3c0f8, 0xffffffff, 0x00090008,
1573 	0xc318, 0xffffffff, 0x00020200,
1574 	0x3350, 0xffffffff, 0x00000200,
1575 	0x15c0, 0xffffffff, 0x00000400,
1576 	0x55e8, 0xffffffff, 0x00000000,
1577 	0x2f50, 0xffffffff, 0x00000902,
1578 	0x3c000, 0xffffffff, 0x96940200,
1579 	0x8708, 0xffffffff, 0x00900100,
1580 	0xc424, 0xffffffff, 0x0020003f,
1581 	0x38, 0xffffffff, 0x0140001c,
1582 	0x3c, 0x000f0000, 0x000f0000,
1583 	0x220, 0xffffffff, 0xc060000c,
1584 	0x224, 0xc0000fff, 0x00000100,
1585 	0xf90, 0xffffffff, 0x00000100,
1586 	0xf98, 0x00000101, 0x00000000,
1587 	0x20a8, 0xffffffff, 0x00000104,
1588 	0x55e4, 0xff000fff, 0x00000100,
1589 	0x30cc, 0xc0000fff, 0x00000104,
1590 	0xc1e4, 0x00000001, 0x00000001,
1591 	0xd00c, 0xff000ff0, 0x00000100,
1592 	0xd80c, 0xff000ff0, 0x00000100
1593 };
1594 
1595 static const u32 godavari_golden_registers[] =
1596 {
1597 	0x55e4, 0xff607fff, 0xfc000100,
1598 	0x6ed8, 0x00010101, 0x00010000,
1599 	0x9830, 0xffffffff, 0x00000000,
1600 	0x98302, 0xf00fffff, 0x00000400,
1601 	0x6130, 0xffffffff, 0x00010000,
1602 	0x5bb0, 0x000000f0, 0x00000070,
1603 	0x5bc0, 0xf0311fff, 0x80300000,
1604 	0x98f8, 0x73773777, 0x12010001,
1605 	0x98fc, 0xffffffff, 0x00000010,
1606 	0x8030, 0x00001f0f, 0x0000100a,
1607 	0x2f48, 0x73773777, 0x12010001,
1608 	0x2408, 0x000fffff, 0x000c007f,
1609 	0x8a14, 0xf000003f, 0x00000007,
1610 	0x8b24, 0xffffffff, 0x00ff0fff,
1611 	0x30a04, 0x0000ff0f, 0x00000000,
1612 	0x28a4c, 0x07ffffff, 0x06000000,
1613 	0x4d8, 0x00000fff, 0x00000100,
1614 	0xd014, 0x00010000, 0x00810001,
1615 	0xd814, 0x00010000, 0x00810001,
1616 	0x3e78, 0x00000001, 0x00000002,
1617 	0xc768, 0x00000008, 0x00000008,
1618 	0xc770, 0x00000f00, 0x00000800,
1619 	0xc774, 0x00000f00, 0x00000800,
1620 	0xc798, 0x00ffffff, 0x00ff7fbf,
1621 	0xc79c, 0x00ffffff, 0x00ff7faf,
1622 	0x8c00, 0x000000ff, 0x00000001,
1623 	0x214f8, 0x01ff01ff, 0x00000002,
1624 	0x21498, 0x007ff800, 0x00200000,
1625 	0x2015c, 0xffffffff, 0x00000f40,
1626 	0x88c4, 0x001f3ae3, 0x00000082,
1627 	0x88d4, 0x0000001f, 0x00000010,
1628 	0x30934, 0xffffffff, 0x00000000
1629 };
1630 
1631 
1632 static void cik_init_golden_registers(struct radeon_device *rdev)
1633 {
1634 	switch (rdev->family) {
1635 	case CHIP_BONAIRE:
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_mgcg_cgcg_init,
1638 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642 		radeon_program_register_sequence(rdev,
1643 						 bonaire_golden_common_registers,
1644 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645 		radeon_program_register_sequence(rdev,
1646 						 bonaire_golden_spm_registers,
1647 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648 		break;
1649 	case CHIP_KABINI:
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_mgcg_cgcg_init,
1652 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656 		radeon_program_register_sequence(rdev,
1657 						 kalindi_golden_common_registers,
1658 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659 		radeon_program_register_sequence(rdev,
1660 						 kalindi_golden_spm_registers,
1661 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662 		break;
1663 	case CHIP_MULLINS:
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_mgcg_cgcg_init,
1666 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667 		radeon_program_register_sequence(rdev,
1668 						 godavari_golden_registers,
1669 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1670 		radeon_program_register_sequence(rdev,
1671 						 kalindi_golden_common_registers,
1672 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673 		radeon_program_register_sequence(rdev,
1674 						 kalindi_golden_spm_registers,
1675 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676 		break;
1677 	case CHIP_KAVERI:
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_mgcg_cgcg_init,
1680 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1684 		radeon_program_register_sequence(rdev,
1685 						 spectre_golden_common_registers,
1686 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687 		radeon_program_register_sequence(rdev,
1688 						 spectre_golden_spm_registers,
1689 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690 		break;
1691 	case CHIP_HAWAII:
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_mgcg_cgcg_init,
1694 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698 		radeon_program_register_sequence(rdev,
1699 						 hawaii_golden_common_registers,
1700 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701 		radeon_program_register_sequence(rdev,
1702 						 hawaii_golden_spm_registers,
1703 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704 		break;
1705 	default:
1706 		break;
1707 	}
1708 }
1709 
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720 	u32 reference_clock = rdev->clock.spll.reference_freq;
1721 
1722 	if (rdev->flags & RADEON_IS_IGP) {
1723 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724 			return reference_clock / 2;
1725 	} else {
1726 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727 			return reference_clock / 4;
1728 	}
1729 	return reference_clock;
1730 }
1731 
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743 	if (index < rdev->doorbell.num_doorbells) {
1744 		return readl(rdev->doorbell.ptr + index);
1745 	} else {
1746 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747 		return 0;
1748 	}
1749 }
1750 
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763 	if (index < rdev->doorbell.num_doorbells) {
1764 		writel(v, rdev->doorbell.ptr + index);
1765 	} else {
1766 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767 	}
1768 }
1769 
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771 
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774 	{0x00000070, 0x04400000},
1775 	{0x00000071, 0x80c01803},
1776 	{0x00000072, 0x00004004},
1777 	{0x00000073, 0x00000100},
1778 	{0x00000074, 0x00ff0000},
1779 	{0x00000075, 0x34000000},
1780 	{0x00000076, 0x08000014},
1781 	{0x00000077, 0x00cc08ec},
1782 	{0x00000078, 0x00000400},
1783 	{0x00000079, 0x00000000},
1784 	{0x0000007a, 0x04090000},
1785 	{0x0000007c, 0x00000000},
1786 	{0x0000007e, 0x4408a8e8},
1787 	{0x0000007f, 0x00000304},
1788 	{0x00000080, 0x00000000},
1789 	{0x00000082, 0x00000001},
1790 	{0x00000083, 0x00000002},
1791 	{0x00000084, 0xf3e4f400},
1792 	{0x00000085, 0x052024e3},
1793 	{0x00000087, 0x00000000},
1794 	{0x00000088, 0x01000000},
1795 	{0x0000008a, 0x1c0a0000},
1796 	{0x0000008b, 0xff010000},
1797 	{0x0000008d, 0xffffefff},
1798 	{0x0000008e, 0xfff3efff},
1799 	{0x0000008f, 0xfff3efbf},
1800 	{0x00000092, 0xf7ffffff},
1801 	{0x00000093, 0xffffff7f},
1802 	{0x00000095, 0x00101101},
1803 	{0x00000096, 0x00000fff},
1804 	{0x00000097, 0x00116fff},
1805 	{0x00000098, 0x60010000},
1806 	{0x00000099, 0x10010000},
1807 	{0x0000009a, 0x00006000},
1808 	{0x0000009b, 0x00001000},
1809 	{0x0000009f, 0x00b48000}
1810 };
1811 
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813 
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816 	{0x0000007d, 0x40000000},
1817 	{0x0000007e, 0x40180304},
1818 	{0x0000007f, 0x0000ff00},
1819 	{0x00000081, 0x00000000},
1820 	{0x00000083, 0x00000800},
1821 	{0x00000086, 0x00000000},
1822 	{0x00000087, 0x00000100},
1823 	{0x00000088, 0x00020100},
1824 	{0x00000089, 0x00000000},
1825 	{0x0000008b, 0x00040000},
1826 	{0x0000008c, 0x00000100},
1827 	{0x0000008e, 0xff010000},
1828 	{0x00000090, 0xffffefff},
1829 	{0x00000091, 0xfff3efff},
1830 	{0x00000092, 0xfff3efbf},
1831 	{0x00000093, 0xf7ffffff},
1832 	{0x00000094, 0xffffff7f},
1833 	{0x00000095, 0x00000fff},
1834 	{0x00000096, 0x00116fff},
1835 	{0x00000097, 0x60010000},
1836 	{0x00000098, 0x10010000},
1837 	{0x0000009f, 0x00c79000}
1838 };
1839 
1840 
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858 			     MEID(me & 0x3) |
1859 			     VMID(vmid & 0xf) |
1860 			     QUEUEID(queue & 0x7));
1861 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863 
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875 	const __be32 *fw_data = NULL;
1876 	const __le32 *new_fw_data = NULL;
1877 	u32 running, tmp;
1878 	u32 *io_mc_regs = NULL;
1879 	const __le32 *new_io_mc_regs = NULL;
1880 	int i, regs_size, ucode_size;
1881 
1882 	if (!rdev->mc_fw)
1883 		return -EINVAL;
1884 
1885 	if (rdev->new_fw) {
1886 		const struct mc_firmware_header_v1_0 *hdr =
1887 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888 
1889 		radeon_ucode_print_mc_hdr(&hdr->header);
1890 
1891 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892 		new_io_mc_regs = (const __le32 *)
1893 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895 		new_fw_data = (const __le32 *)
1896 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 	} else {
1898 		ucode_size = rdev->mc_fw->size / 4;
1899 
1900 		switch (rdev->family) {
1901 		case CHIP_BONAIRE:
1902 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904 			break;
1905 		case CHIP_HAWAII:
1906 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1908 			break;
1909 		default:
1910 			return -EINVAL;
1911 		}
1912 		fw_data = (const __be32 *)rdev->mc_fw->data;
1913 	}
1914 
1915 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916 
1917 	if (running == 0) {
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 	}
1966 
1967 	return 0;
1968 }
1969 
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981 	const char *chip_name;
1982 	const char *new_chip_name;
1983 	size_t pfp_req_size, me_req_size, ce_req_size,
1984 		mec_req_size, rlc_req_size, mc_req_size = 0,
1985 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986 	char fw_name[30];
1987 	int new_fw = 0;
1988 	int err;
1989 	int num_fw;
1990 	bool new_smc = false;
1991 
1992 	DRM_DEBUG("\n");
1993 
1994 	switch (rdev->family) {
1995 	case CHIP_BONAIRE:
1996 		chip_name = "BONAIRE";
1997 		if ((rdev->pdev->revision == 0x80) ||
1998 		    (rdev->pdev->revision == 0x81) ||
1999 		    (rdev->pdev->device == 0x665f))
2000 			new_smc = true;
2001 		new_chip_name = "bonaire";
2002 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2004 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011 		num_fw = 8;
2012 		break;
2013 	case CHIP_HAWAII:
2014 		chip_name = "HAWAII";
2015 		if (rdev->pdev->revision == 0x80)
2016 			new_smc = true;
2017 		new_chip_name = "hawaii";
2018 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027 		num_fw = 8;
2028 		break;
2029 	case CHIP_KAVERI:
2030 		chip_name = "KAVERI";
2031 		new_chip_name = "kaveri";
2032 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2034 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038 		num_fw = 7;
2039 		break;
2040 	case CHIP_KABINI:
2041 		chip_name = "KABINI";
2042 		new_chip_name = "kabini";
2043 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2045 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049 		num_fw = 6;
2050 		break;
2051 	case CHIP_MULLINS:
2052 		chip_name = "MULLINS";
2053 		new_chip_name = "mullins";
2054 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2056 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060 		num_fw = 6;
2061 		break;
2062 	default: BUG();
2063 	}
2064 
2065 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->pfp_fw->size != pfp_req_size) {
2075 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076 			       rdev->pfp_fw->size, fw_name);
2077 			err = -EINVAL;
2078 			goto out;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->pfp_fw);
2082 		if (err) {
2083 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084 			       fw_name);
2085 			goto out;
2086 		} else {
2087 			new_fw++;
2088 		}
2089 	}
2090 
2091 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093 	if (err) {
2094 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096 		if (err)
2097 			goto out;
2098 		if (rdev->me_fw->size != me_req_size) {
2099 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100 			       rdev->me_fw->size, fw_name);
2101 			err = -EINVAL;
2102 		}
2103 	} else {
2104 		err = radeon_ucode_validate(rdev->me_fw);
2105 		if (err) {
2106 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123 			       rdev->ce_fw->size, fw_name);
2124 			err = -EINVAL;
2125 		}
2126 	} else {
2127 		err = radeon_ucode_validate(rdev->ce_fw);
2128 		if (err) {
2129 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130 			       fw_name);
2131 			goto out;
2132 		} else {
2133 			new_fw++;
2134 		}
2135 	}
2136 
2137 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139 	if (err) {
2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142 		if (err)
2143 			goto out;
2144 		if (rdev->mec_fw->size != mec_req_size) {
2145 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146 			       rdev->mec_fw->size, fw_name);
2147 			err = -EINVAL;
2148 		}
2149 	} else {
2150 		err = radeon_ucode_validate(rdev->mec_fw);
2151 		if (err) {
2152 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153 			       fw_name);
2154 			goto out;
2155 		} else {
2156 			new_fw++;
2157 		}
2158 	}
2159 
2160 	if (rdev->family == CHIP_KAVERI) {
2161 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163 		if (err) {
2164 			goto out;
2165 		} else {
2166 			err = radeon_ucode_validate(rdev->mec2_fw);
2167 			if (err) {
2168 				goto out;
2169 			} else {
2170 				new_fw++;
2171 			}
2172 		}
2173 	}
2174 
2175 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177 	if (err) {
2178 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180 		if (err)
2181 			goto out;
2182 		if (rdev->rlc_fw->size != rlc_req_size) {
2183 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184 			       rdev->rlc_fw->size, fw_name);
2185 			err = -EINVAL;
2186 		}
2187 	} else {
2188 		err = radeon_ucode_validate(rdev->rlc_fw);
2189 		if (err) {
2190 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191 			       fw_name);
2192 			goto out;
2193 		} else {
2194 			new_fw++;
2195 		}
2196 	}
2197 
2198 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200 	if (err) {
2201 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203 		if (err)
2204 			goto out;
2205 		if (rdev->sdma_fw->size != sdma_req_size) {
2206 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207 			       rdev->sdma_fw->size, fw_name);
2208 			err = -EINVAL;
2209 		}
2210 	} else {
2211 		err = radeon_ucode_validate(rdev->sdma_fw);
2212 		if (err) {
2213 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214 			       fw_name);
2215 			goto out;
2216 		} else {
2217 			new_fw++;
2218 		}
2219 	}
2220 
2221 	/* No SMC, MC ucode on APUs */
2222 	if (!(rdev->flags & RADEON_IS_IGP)) {
2223 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225 		if (err) {
2226 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228 			if (err) {
2229 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231 				if (err)
2232 					goto out;
2233 			}
2234 			if ((rdev->mc_fw->size != mc_req_size) &&
2235 			    (rdev->mc_fw->size != mc2_req_size)){
2236 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237 				       rdev->mc_fw->size, fw_name);
2238 				err = -EINVAL;
2239 			}
2240 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241 		} else {
2242 			err = radeon_ucode_validate(rdev->mc_fw);
2243 			if (err) {
2244 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245 				       fw_name);
2246 				goto out;
2247 			} else {
2248 				new_fw++;
2249 			}
2250 		}
2251 
2252 		if (new_smc)
2253 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254 		else
2255 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257 		if (err) {
2258 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260 			if (err) {
2261 				pr_err("smc: error loading firmware \"%s\"\n",
2262 				       fw_name);
2263 				release_firmware(rdev->smc_fw);
2264 				rdev->smc_fw = NULL;
2265 				err = 0;
2266 			} else if (rdev->smc_fw->size != smc_req_size) {
2267 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268 				       rdev->smc_fw->size, fw_name);
2269 				err = -EINVAL;
2270 			}
2271 		} else {
2272 			err = radeon_ucode_validate(rdev->smc_fw);
2273 			if (err) {
2274 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275 				       fw_name);
2276 				goto out;
2277 			} else {
2278 				new_fw++;
2279 			}
2280 		}
2281 	}
2282 
2283 	if (new_fw == 0) {
2284 		rdev->new_fw = false;
2285 	} else if (new_fw < num_fw) {
2286 		pr_err("ci_fw: mixing new and old firmware!\n");
2287 		err = -EINVAL;
2288 	} else {
2289 		rdev->new_fw = true;
2290 	}
2291 
2292 out:
2293 	if (err) {
2294 		if (err != -EINVAL)
2295 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296 			       fw_name);
2297 		release_firmware(rdev->pfp_fw);
2298 		rdev->pfp_fw = NULL;
2299 		release_firmware(rdev->me_fw);
2300 		rdev->me_fw = NULL;
2301 		release_firmware(rdev->ce_fw);
2302 		rdev->ce_fw = NULL;
2303 		release_firmware(rdev->mec_fw);
2304 		rdev->mec_fw = NULL;
2305 		release_firmware(rdev->mec2_fw);
2306 		rdev->mec2_fw = NULL;
2307 		release_firmware(rdev->rlc_fw);
2308 		rdev->rlc_fw = NULL;
2309 		release_firmware(rdev->sdma_fw);
2310 		rdev->sdma_fw = NULL;
2311 		release_firmware(rdev->mc_fw);
2312 		rdev->mc_fw = NULL;
2313 		release_firmware(rdev->smc_fw);
2314 		rdev->smc_fw = NULL;
2315 	}
2316 	return err;
2317 }
2318 
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335 	u32 *tile = rdev->config.cik.tile_mode_array;
2336 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337 	const u32 num_tile_mode_states =
2338 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339 	const u32 num_secondary_tile_mode_states =
2340 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341 	u32 reg_offset, split_equal_to_row_size;
2342 	u32 num_pipe_configs;
2343 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344 		rdev->config.cik.max_shader_engines;
2345 
2346 	switch (rdev->config.cik.mem_row_size_in_kb) {
2347 	case 1:
2348 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349 		break;
2350 	case 2:
2351 	default:
2352 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353 		break;
2354 	case 4:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356 		break;
2357 	}
2358 
2359 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360 	if (num_pipe_configs > 8)
2361 		num_pipe_configs = 16;
2362 
2363 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364 		tile[reg_offset] = 0;
2365 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366 		macrotile[reg_offset] = 0;
2367 
2368 	switch(num_pipe_configs) {
2369 	case 16:
2370 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 			   TILE_SPLIT(split_equal_to_row_size));
2390 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(split_equal_to_row_size));
2401 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 
2449 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 			   NUM_BANKS(ADDR_SURF_16_BANK));
2453 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 			   NUM_BANKS(ADDR_SURF_16_BANK));
2457 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 			   NUM_BANKS(ADDR_SURF_16_BANK));
2461 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 			   NUM_BANKS(ADDR_SURF_16_BANK));
2465 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 			   NUM_BANKS(ADDR_SURF_8_BANK));
2469 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 			   NUM_BANKS(ADDR_SURF_4_BANK));
2473 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 			   NUM_BANKS(ADDR_SURF_2_BANK));
2477 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			    NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			    NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			    NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			    NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 			    NUM_BANKS(ADDR_SURF_2_BANK));
2505 
2506 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510 		break;
2511 
2512 	case 8:
2513 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 			   TILE_SPLIT(split_equal_to_row_size));
2533 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(split_equal_to_row_size));
2544 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 
2592 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595 				NUM_BANKS(ADDR_SURF_16_BANK));
2596 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 				NUM_BANKS(ADDR_SURF_16_BANK));
2600 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 				NUM_BANKS(ADDR_SURF_16_BANK));
2604 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607 				NUM_BANKS(ADDR_SURF_16_BANK));
2608 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 				NUM_BANKS(ADDR_SURF_8_BANK));
2612 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 				NUM_BANKS(ADDR_SURF_4_BANK));
2616 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619 				NUM_BANKS(ADDR_SURF_2_BANK));
2620 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635 				NUM_BANKS(ADDR_SURF_16_BANK));
2636 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_8_BANK));
2640 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_4_BANK));
2644 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647 				NUM_BANKS(ADDR_SURF_2_BANK));
2648 
2649 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653 		break;
2654 
2655 	case 4:
2656 		if (num_rbs == 4) {
2657 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 			   TILE_SPLIT(split_equal_to_row_size));
2677 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(split_equal_to_row_size));
2688 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 
2736 		} else if (num_rbs < 4) {
2737 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(split_equal_to_row_size));
2757 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(split_equal_to_row_size));
2768 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		}
2816 
2817 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 				NUM_BANKS(ADDR_SURF_16_BANK));
2829 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 				NUM_BANKS(ADDR_SURF_8_BANK));
2841 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 				NUM_BANKS(ADDR_SURF_4_BANK));
2845 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_16_BANK));
2865 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868 				NUM_BANKS(ADDR_SURF_8_BANK));
2869 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872 				NUM_BANKS(ADDR_SURF_4_BANK));
2873 
2874 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878 		break;
2879 
2880 	case 2:
2881 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P2) |
2884 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 			   PIPE_CONFIG(ADDR_SURF_P2) |
2888 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P2) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P2) |
2896 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P2) |
2900 			   TILE_SPLIT(split_equal_to_row_size));
2901 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(split_equal_to_row_size));
2912 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913 			   PIPE_CONFIG(ADDR_SURF_P2);
2914 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916 			   PIPE_CONFIG(ADDR_SURF_P2));
2917 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 			    PIPE_CONFIG(ADDR_SURF_P2) |
2920 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 			    PIPE_CONFIG(ADDR_SURF_P2) |
2924 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 			    PIPE_CONFIG(ADDR_SURF_P2) |
2928 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 			    PIPE_CONFIG(ADDR_SURF_P2));
2947 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 
2960 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 				NUM_BANKS(ADDR_SURF_16_BANK));
2964 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 				NUM_BANKS(ADDR_SURF_16_BANK));
2980 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 				NUM_BANKS(ADDR_SURF_16_BANK));
2984 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987 				NUM_BANKS(ADDR_SURF_8_BANK));
2988 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 				NUM_BANKS(ADDR_SURF_16_BANK));
3012 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015 				NUM_BANKS(ADDR_SURF_8_BANK));
3016 
3017 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021 		break;
3022 
3023 	default:
3024 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025 	}
3026 }
3027 
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040 			     u32 se_num, u32 sh_num)
3041 {
3042 	u32 data = INSTANCE_BROADCAST_WRITES;
3043 
3044 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046 	else if (se_num == 0xffffffff)
3047 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048 	else if (sh_num == 0xffffffff)
3049 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050 	else
3051 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052 	WREG32(GRBM_GFX_INDEX, data);
3053 }
3054 
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065 	u32 i, mask = 0;
3066 
3067 	for (i = 0; i < bit_width; i++) {
3068 		mask <<= 1;
3069 		mask |= 1;
3070 	}
3071 	return mask;
3072 }
3073 
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086 			      u32 max_rb_num_per_se,
3087 			      u32 sh_per_se)
3088 {
3089 	u32 data, mask;
3090 
3091 	data = RREG32(CC_RB_BACKEND_DISABLE);
3092 	if (data & 1)
3093 		data &= BACKEND_DISABLE_MASK;
3094 	else
3095 		data = 0;
3096 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097 
3098 	data >>= BACKEND_DISABLE_SHIFT;
3099 
3100 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101 
3102 	return data & mask;
3103 }
3104 
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116 			 u32 se_num, u32 sh_per_se,
3117 			 u32 max_rb_num_per_se)
3118 {
3119 	int i, j;
3120 	u32 data, mask;
3121 	u32 disabled_rbs = 0;
3122 	u32 enabled_rbs = 0;
3123 
3124 	for (i = 0; i < se_num; i++) {
3125 		for (j = 0; j < sh_per_se; j++) {
3126 			cik_select_se_sh(rdev, i, j);
3127 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128 			if (rdev->family == CHIP_HAWAII)
3129 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130 			else
3131 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132 		}
3133 	}
3134 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135 
3136 	mask = 1;
3137 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138 		if (!(disabled_rbs & mask))
3139 			enabled_rbs |= mask;
3140 		mask <<= 1;
3141 	}
3142 
3143 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3144 
3145 	for (i = 0; i < se_num; i++) {
3146 		cik_select_se_sh(rdev, i, 0xffffffff);
3147 		data = 0;
3148 		for (j = 0; j < sh_per_se; j++) {
3149 			switch (enabled_rbs & 3) {
3150 			case 0:
3151 				if (j == 0)
3152 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153 				else
3154 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155 				break;
3156 			case 1:
3157 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158 				break;
3159 			case 2:
3160 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			case 3:
3163 			default:
3164 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165 				break;
3166 			}
3167 			enabled_rbs >>= 2;
3168 		}
3169 		WREG32(PA_SC_RASTER_CONFIG, data);
3170 	}
3171 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172 }
3173 
3174 /**
3175  * cik_gpu_init - setup the 3D engine
3176  *
3177  * @rdev: radeon_device pointer
3178  *
3179  * Configures the 3D engine and tiling configuration
3180  * registers so that the 3D engine is usable.
3181  */
3182 static void cik_gpu_init(struct radeon_device *rdev)
3183 {
3184 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185 	u32 mc_shared_chmap, mc_arb_ramcfg;
3186 	u32 hdp_host_path_cntl;
3187 	u32 tmp;
3188 	int i, j;
3189 
3190 	switch (rdev->family) {
3191 	case CHIP_BONAIRE:
3192 		rdev->config.cik.max_shader_engines = 2;
3193 		rdev->config.cik.max_tile_pipes = 4;
3194 		rdev->config.cik.max_cu_per_sh = 7;
3195 		rdev->config.cik.max_sh_per_se = 1;
3196 		rdev->config.cik.max_backends_per_se = 2;
3197 		rdev->config.cik.max_texture_channel_caches = 4;
3198 		rdev->config.cik.max_gprs = 256;
3199 		rdev->config.cik.max_gs_threads = 32;
3200 		rdev->config.cik.max_hw_contexts = 8;
3201 
3202 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207 		break;
3208 	case CHIP_HAWAII:
3209 		rdev->config.cik.max_shader_engines = 4;
3210 		rdev->config.cik.max_tile_pipes = 16;
3211 		rdev->config.cik.max_cu_per_sh = 11;
3212 		rdev->config.cik.max_sh_per_se = 1;
3213 		rdev->config.cik.max_backends_per_se = 4;
3214 		rdev->config.cik.max_texture_channel_caches = 16;
3215 		rdev->config.cik.max_gprs = 256;
3216 		rdev->config.cik.max_gs_threads = 32;
3217 		rdev->config.cik.max_hw_contexts = 8;
3218 
3219 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224 		break;
3225 	case CHIP_KAVERI:
3226 		rdev->config.cik.max_shader_engines = 1;
3227 		rdev->config.cik.max_tile_pipes = 4;
3228 		rdev->config.cik.max_cu_per_sh = 8;
3229 		rdev->config.cik.max_backends_per_se = 2;
3230 		rdev->config.cik.max_sh_per_se = 1;
3231 		rdev->config.cik.max_texture_channel_caches = 4;
3232 		rdev->config.cik.max_gprs = 256;
3233 		rdev->config.cik.max_gs_threads = 16;
3234 		rdev->config.cik.max_hw_contexts = 8;
3235 
3236 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241 		break;
3242 	case CHIP_KABINI:
3243 	case CHIP_MULLINS:
3244 	default:
3245 		rdev->config.cik.max_shader_engines = 1;
3246 		rdev->config.cik.max_tile_pipes = 2;
3247 		rdev->config.cik.max_cu_per_sh = 2;
3248 		rdev->config.cik.max_sh_per_se = 1;
3249 		rdev->config.cik.max_backends_per_se = 1;
3250 		rdev->config.cik.max_texture_channel_caches = 2;
3251 		rdev->config.cik.max_gprs = 256;
3252 		rdev->config.cik.max_gs_threads = 16;
3253 		rdev->config.cik.max_hw_contexts = 8;
3254 
3255 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260 		break;
3261 	}
3262 
3263 	/* Initialize HDP */
3264 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265 		WREG32((0x2c14 + j), 0x00000000);
3266 		WREG32((0x2c18 + j), 0x00000000);
3267 		WREG32((0x2c1c + j), 0x00000000);
3268 		WREG32((0x2c20 + j), 0x00000000);
3269 		WREG32((0x2c24 + j), 0x00000000);
3270 	}
3271 
3272 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273 	WREG32(SRBM_INT_CNTL, 0x1);
3274 	WREG32(SRBM_INT_ACK, 0x1);
3275 
3276 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277 
3278 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280 
3281 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3283 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3286 		rdev->config.cik.mem_row_size_in_kb = 4;
3287 	/* XXX use MC settings? */
3288 	rdev->config.cik.shader_engine_tile_size = 32;
3289 	rdev->config.cik.num_gpus = 1;
3290 	rdev->config.cik.multi_gpu_tile_size = 64;
3291 
3292 	/* fix up row size */
3293 	gb_addr_config &= ~ROW_SIZE_MASK;
3294 	switch (rdev->config.cik.mem_row_size_in_kb) {
3295 	case 1:
3296 	default:
3297 		gb_addr_config |= ROW_SIZE(0);
3298 		break;
3299 	case 2:
3300 		gb_addr_config |= ROW_SIZE(1);
3301 		break;
3302 	case 4:
3303 		gb_addr_config |= ROW_SIZE(2);
3304 		break;
3305 	}
3306 
3307 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3308 	 * not have bank info, so create a custom tiling dword.
3309 	 * bits 3:0   num_pipes
3310 	 * bits 7:4   num_banks
3311 	 * bits 11:8  group_size
3312 	 * bits 15:12 row_size
3313 	 */
3314 	rdev->config.cik.tile_config = 0;
3315 	switch (rdev->config.cik.num_tile_pipes) {
3316 	case 1:
3317 		rdev->config.cik.tile_config |= (0 << 0);
3318 		break;
3319 	case 2:
3320 		rdev->config.cik.tile_config |= (1 << 0);
3321 		break;
3322 	case 4:
3323 		rdev->config.cik.tile_config |= (2 << 0);
3324 		break;
3325 	case 8:
3326 	default:
3327 		/* XXX what about 12? */
3328 		rdev->config.cik.tile_config |= (3 << 0);
3329 		break;
3330 	}
3331 	rdev->config.cik.tile_config |=
3332 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333 	rdev->config.cik.tile_config |=
3334 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335 	rdev->config.cik.tile_config |=
3336 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337 
3338 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346 
3347 	cik_tiling_mode_table_init(rdev);
3348 
3349 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350 		     rdev->config.cik.max_sh_per_se,
3351 		     rdev->config.cik.max_backends_per_se);
3352 
3353 	rdev->config.cik.active_cus = 0;
3354 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356 			rdev->config.cik.active_cus +=
3357 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358 		}
3359 	}
3360 
3361 	/* set HW defaults for 3D engine */
3362 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363 
3364 	WREG32(SX_DEBUG_1, 0x20);
3365 
3366 	WREG32(TA_CNTL_AUX, 0x00010000);
3367 
3368 	tmp = RREG32(SPI_CONFIG_CNTL);
3369 	tmp |= 0x03000000;
3370 	WREG32(SPI_CONFIG_CNTL, tmp);
3371 
3372 	WREG32(SQ_CONFIG, 1);
3373 
3374 	WREG32(DB_DEBUG, 0);
3375 
3376 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377 	tmp |= 0x00000400;
3378 	WREG32(DB_DEBUG2, tmp);
3379 
3380 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381 	tmp |= 0x00020200;
3382 	WREG32(DB_DEBUG3, tmp);
3383 
3384 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385 	tmp |= 0x00018208;
3386 	WREG32(CB_HW_CONTROL, tmp);
3387 
3388 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389 
3390 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394 
3395 	WREG32(VGT_NUM_INSTANCES, 1);
3396 
3397 	WREG32(CP_PERFMON_CNTL, 0);
3398 
3399 	WREG32(SQ_CONFIG, 0);
3400 
3401 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402 					  FORCE_EOV_MAX_REZ_CNT(255)));
3403 
3404 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406 
3407 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3408 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409 
3410 	tmp = RREG32(HDP_MISC_CNTL);
3411 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412 	WREG32(HDP_MISC_CNTL, tmp);
3413 
3414 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416 
3417 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419 
3420 	udelay(50);
3421 }
3422 
3423 /*
3424  * GPU scratch registers helpers function.
3425  */
3426 /**
3427  * cik_scratch_init - setup driver info for CP scratch regs
3428  *
3429  * @rdev: radeon_device pointer
3430  *
3431  * Set up the number and offset of the CP scratch registers.
3432  * NOTE: use of CP scratch registers is a legacy inferface and
3433  * is not used by default on newer asics (r6xx+).  On newer asics,
3434  * memory buffers are used for fences rather than scratch regs.
3435  */
3436 static void cik_scratch_init(struct radeon_device *rdev)
3437 {
3438 	int i;
3439 
3440 	rdev->scratch.num_reg = 7;
3441 	rdev->scratch.reg_base = SCRATCH_REG0;
3442 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3443 		rdev->scratch.free[i] = true;
3444 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445 	}
3446 }
3447 
3448 /**
3449  * cik_ring_test - basic gfx ring test
3450  *
3451  * @rdev: radeon_device pointer
3452  * @ring: radeon_ring structure holding ring information
3453  *
3454  * Allocate a scratch register and write to it using the gfx ring (CIK).
3455  * Provides a basic gfx ring test to verify that the ring is working.
3456  * Used by cik_cp_gfx_resume();
3457  * Returns 0 on success, error on failure.
3458  */
3459 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460 {
3461 	uint32_t scratch;
3462 	uint32_t tmp = 0;
3463 	unsigned i;
3464 	int r;
3465 
3466 	r = radeon_scratch_get(rdev, &scratch);
3467 	if (r) {
3468 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469 		return r;
3470 	}
3471 	WREG32(scratch, 0xCAFEDEAD);
3472 	r = radeon_ring_lock(rdev, ring, 3);
3473 	if (r) {
3474 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475 		radeon_scratch_free(rdev, scratch);
3476 		return r;
3477 	}
3478 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480 	radeon_ring_write(ring, 0xDEADBEEF);
3481 	radeon_ring_unlock_commit(rdev, ring, false);
3482 
3483 	for (i = 0; i < rdev->usec_timeout; i++) {
3484 		tmp = RREG32(scratch);
3485 		if (tmp == 0xDEADBEEF)
3486 			break;
3487 		udelay(1);
3488 	}
3489 	if (i < rdev->usec_timeout) {
3490 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491 	} else {
3492 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493 			  ring->idx, scratch, tmp);
3494 		r = -EINVAL;
3495 	}
3496 	radeon_scratch_free(rdev, scratch);
3497 	return r;
3498 }
3499 
3500 /**
3501  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502  *
3503  * @rdev: radeon_device pointer
3504  * @ridx: radeon ring index
3505  *
3506  * Emits an hdp flush on the cp.
3507  */
3508 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509 				       int ridx)
3510 {
3511 	struct radeon_ring *ring = &rdev->ring[ridx];
3512 	u32 ref_and_mask;
3513 
3514 	switch (ring->idx) {
3515 	case CAYMAN_RING_TYPE_CP1_INDEX:
3516 	case CAYMAN_RING_TYPE_CP2_INDEX:
3517 	default:
3518 		switch (ring->me) {
3519 		case 0:
3520 			ref_and_mask = CP2 << ring->pipe;
3521 			break;
3522 		case 1:
3523 			ref_and_mask = CP6 << ring->pipe;
3524 			break;
3525 		default:
3526 			return;
3527 		}
3528 		break;
3529 	case RADEON_RING_TYPE_GFX_INDEX:
3530 		ref_and_mask = CP0;
3531 		break;
3532 	}
3533 
3534 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3537 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3538 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540 	radeon_ring_write(ring, ref_and_mask);
3541 	radeon_ring_write(ring, ref_and_mask);
3542 	radeon_ring_write(ring, 0x20); /* poll interval */
3543 }
3544 
3545 /**
3546  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547  *
3548  * @rdev: radeon_device pointer
3549  * @fence: radeon fence object
3550  *
3551  * Emits a fence sequnce number on the gfx ring and flushes
3552  * GPU caches.
3553  */
3554 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555 			     struct radeon_fence *fence)
3556 {
3557 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3558 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559 
3560 	/* Workaround for cache flush problems. First send a dummy EOP
3561 	 * event down the pipe with seq one below.
3562 	 */
3563 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565 				 EOP_TC_ACTION_EN |
3566 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567 				 EVENT_INDEX(5)));
3568 	radeon_ring_write(ring, addr & 0xfffffffc);
3569 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570 				DATA_SEL(1) | INT_SEL(0));
3571 	radeon_ring_write(ring, fence->seq - 1);
3572 	radeon_ring_write(ring, 0);
3573 
3574 	/* Then send the real EOP event down the pipe. */
3575 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577 				 EOP_TC_ACTION_EN |
3578 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579 				 EVENT_INDEX(5)));
3580 	radeon_ring_write(ring, addr & 0xfffffffc);
3581 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582 	radeon_ring_write(ring, fence->seq);
3583 	radeon_ring_write(ring, 0);
3584 }
3585 
3586 /**
3587  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588  *
3589  * @rdev: radeon_device pointer
3590  * @fence: radeon fence object
3591  *
3592  * Emits a fence sequnce number on the compute ring and flushes
3593  * GPU caches.
3594  */
3595 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596 				 struct radeon_fence *fence)
3597 {
3598 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3599 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600 
3601 	/* RELEASE_MEM - flush caches, send int */
3602 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604 				 EOP_TC_ACTION_EN |
3605 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606 				 EVENT_INDEX(5)));
3607 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608 	radeon_ring_write(ring, addr & 0xfffffffc);
3609 	radeon_ring_write(ring, upper_32_bits(addr));
3610 	radeon_ring_write(ring, fence->seq);
3611 	radeon_ring_write(ring, 0);
3612 }
3613 
3614 /**
3615  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616  *
3617  * @rdev: radeon_device pointer
3618  * @ring: radeon ring buffer object
3619  * @semaphore: radeon semaphore object
3620  * @emit_wait: Is this a sempahore wait?
3621  *
3622  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623  * from running ahead of semaphore waits.
3624  */
3625 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626 			     struct radeon_ring *ring,
3627 			     struct radeon_semaphore *semaphore,
3628 			     bool emit_wait)
3629 {
3630 	uint64_t addr = semaphore->gpu_addr;
3631 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632 
3633 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634 	radeon_ring_write(ring, lower_32_bits(addr));
3635 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636 
3637 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638 		/* Prevent the PFP from running ahead of the semaphore wait */
3639 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640 		radeon_ring_write(ring, 0x0);
3641 	}
3642 
3643 	return true;
3644 }
3645 
3646 /**
3647  * cik_copy_cpdma - copy pages using the CP DMA engine
3648  *
3649  * @rdev: radeon_device pointer
3650  * @src_offset: src GPU address
3651  * @dst_offset: dst GPU address
3652  * @num_gpu_pages: number of GPU pages to xfer
3653  * @resv: reservation object to sync to
3654  *
3655  * Copy GPU paging using the CP DMA engine (CIK+).
3656  * Used by the radeon ttm implementation to move pages if
3657  * registered as the asic copy callback.
3658  */
3659 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660 				    uint64_t src_offset, uint64_t dst_offset,
3661 				    unsigned num_gpu_pages,
3662 				    struct dma_resv *resv)
3663 {
3664 	struct radeon_fence *fence;
3665 	struct radeon_sync sync;
3666 	int ring_index = rdev->asic->copy.blit_ring_index;
3667 	struct radeon_ring *ring = &rdev->ring[ring_index];
3668 	u32 size_in_bytes, cur_size_in_bytes, control;
3669 	int i, num_loops;
3670 	int r = 0;
3671 
3672 	radeon_sync_create(&sync);
3673 
3674 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677 	if (r) {
3678 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3679 		radeon_sync_free(rdev, &sync, NULL);
3680 		return ERR_PTR(r);
3681 	}
3682 
3683 	radeon_sync_resv(rdev, &sync, resv, false);
3684 	radeon_sync_rings(rdev, &sync, ring->idx);
3685 
3686 	for (i = 0; i < num_loops; i++) {
3687 		cur_size_in_bytes = size_in_bytes;
3688 		if (cur_size_in_bytes > 0x1fffff)
3689 			cur_size_in_bytes = 0x1fffff;
3690 		size_in_bytes -= cur_size_in_bytes;
3691 		control = 0;
3692 		if (size_in_bytes == 0)
3693 			control |= PACKET3_DMA_DATA_CP_SYNC;
3694 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695 		radeon_ring_write(ring, control);
3696 		radeon_ring_write(ring, lower_32_bits(src_offset));
3697 		radeon_ring_write(ring, upper_32_bits(src_offset));
3698 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3699 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3700 		radeon_ring_write(ring, cur_size_in_bytes);
3701 		src_offset += cur_size_in_bytes;
3702 		dst_offset += cur_size_in_bytes;
3703 	}
3704 
3705 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3706 	if (r) {
3707 		radeon_ring_unlock_undo(rdev, ring);
3708 		radeon_sync_free(rdev, &sync, NULL);
3709 		return ERR_PTR(r);
3710 	}
3711 
3712 	radeon_ring_unlock_commit(rdev, ring, false);
3713 	radeon_sync_free(rdev, &sync, fence);
3714 
3715 	return fence;
3716 }
3717 
3718 /*
3719  * IB stuff
3720  */
3721 /**
3722  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723  *
3724  * @rdev: radeon_device pointer
3725  * @ib: radeon indirect buffer object
3726  *
3727  * Emits a DE (drawing engine) or CE (constant engine) IB
3728  * on the gfx ring.  IBs are usually generated by userspace
3729  * acceleration drivers and submitted to the kernel for
3730  * scheduling on the ring.  This function schedules the IB
3731  * on the gfx ring for execution by the GPU.
3732  */
3733 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734 {
3735 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3736 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737 	u32 header, control = INDIRECT_BUFFER_VALID;
3738 
3739 	if (ib->is_const_ib) {
3740 		/* set switch buffer packet before const IB */
3741 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742 		radeon_ring_write(ring, 0);
3743 
3744 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745 	} else {
3746 		u32 next_rptr;
3747 		if (ring->rptr_save_reg) {
3748 			next_rptr = ring->wptr + 3 + 4;
3749 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3751 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3752 			radeon_ring_write(ring, next_rptr);
3753 		} else if (rdev->wb.enabled) {
3754 			next_rptr = ring->wptr + 5 + 4;
3755 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759 			radeon_ring_write(ring, next_rptr);
3760 		}
3761 
3762 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763 	}
3764 
3765 	control |= ib->length_dw | (vm_id << 24);
3766 
3767 	radeon_ring_write(ring, header);
3768 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770 	radeon_ring_write(ring, control);
3771 }
3772 
3773 /**
3774  * cik_ib_test - basic gfx ring IB test
3775  *
3776  * @rdev: radeon_device pointer
3777  * @ring: radeon_ring structure holding ring information
3778  *
3779  * Allocate an IB and execute it on the gfx ring (CIK).
3780  * Provides a basic gfx ring test to verify that IBs are working.
3781  * Returns 0 on success, error on failure.
3782  */
3783 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784 {
3785 	struct radeon_ib ib;
3786 	uint32_t scratch;
3787 	uint32_t tmp = 0;
3788 	unsigned i;
3789 	int r;
3790 
3791 	r = radeon_scratch_get(rdev, &scratch);
3792 	if (r) {
3793 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794 		return r;
3795 	}
3796 	WREG32(scratch, 0xCAFEDEAD);
3797 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798 	if (r) {
3799 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800 		radeon_scratch_free(rdev, scratch);
3801 		return r;
3802 	}
3803 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805 	ib.ptr[2] = 0xDEADBEEF;
3806 	ib.length_dw = 3;
3807 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808 	if (r) {
3809 		radeon_scratch_free(rdev, scratch);
3810 		radeon_ib_free(rdev, &ib);
3811 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812 		return r;
3813 	}
3814 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815 		RADEON_USEC_IB_TEST_TIMEOUT));
3816 	if (r < 0) {
3817 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818 		radeon_scratch_free(rdev, scratch);
3819 		radeon_ib_free(rdev, &ib);
3820 		return r;
3821 	} else if (r == 0) {
3822 		DRM_ERROR("radeon: fence wait timed out.\n");
3823 		radeon_scratch_free(rdev, scratch);
3824 		radeon_ib_free(rdev, &ib);
3825 		return -ETIMEDOUT;
3826 	}
3827 	r = 0;
3828 	for (i = 0; i < rdev->usec_timeout; i++) {
3829 		tmp = RREG32(scratch);
3830 		if (tmp == 0xDEADBEEF)
3831 			break;
3832 		udelay(1);
3833 	}
3834 	if (i < rdev->usec_timeout) {
3835 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836 	} else {
3837 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838 			  scratch, tmp);
3839 		r = -EINVAL;
3840 	}
3841 	radeon_scratch_free(rdev, scratch);
3842 	radeon_ib_free(rdev, &ib);
3843 	return r;
3844 }
3845 
3846 /*
3847  * CP.
3848  * On CIK, gfx and compute now have independant command processors.
3849  *
3850  * GFX
3851  * Gfx consists of a single ring and can process both gfx jobs and
3852  * compute jobs.  The gfx CP consists of three microengines (ME):
3853  * PFP - Pre-Fetch Parser
3854  * ME - Micro Engine
3855  * CE - Constant Engine
3856  * The PFP and ME make up what is considered the Drawing Engine (DE).
3857  * The CE is an asynchronous engine used for updating buffer desciptors
3858  * used by the DE so that they can be loaded into cache in parallel
3859  * while the DE is processing state update packets.
3860  *
3861  * Compute
3862  * The compute CP consists of two microengines (ME):
3863  * MEC1 - Compute MicroEngine 1
3864  * MEC2 - Compute MicroEngine 2
3865  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866  * The queues are exposed to userspace and are programmed directly
3867  * by the compute runtime.
3868  */
3869 /**
3870  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871  *
3872  * @rdev: radeon_device pointer
3873  * @enable: enable or disable the MEs
3874  *
3875  * Halts or unhalts the gfx MEs.
3876  */
3877 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878 {
3879 	if (enable)
3880 		WREG32(CP_ME_CNTL, 0);
3881 	else {
3882 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886 	}
3887 	udelay(50);
3888 }
3889 
3890 /**
3891  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892  *
3893  * @rdev: radeon_device pointer
3894  *
3895  * Loads the gfx PFP, ME, and CE ucode.
3896  * Returns 0 for success, -EINVAL if the ucode is not available.
3897  */
3898 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899 {
3900 	int i;
3901 
3902 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903 		return -EINVAL;
3904 
3905 	cik_cp_gfx_enable(rdev, false);
3906 
3907 	if (rdev->new_fw) {
3908 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3911 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912 		const struct gfx_firmware_header_v1_0 *me_hdr =
3913 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914 		const __le32 *fw_data;
3915 		u32 fw_size;
3916 
3917 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920 
3921 		/* PFP */
3922 		fw_data = (const __le32 *)
3923 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925 		WREG32(CP_PFP_UCODE_ADDR, 0);
3926 		for (i = 0; i < fw_size; i++)
3927 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929 
3930 		/* CE */
3931 		fw_data = (const __le32 *)
3932 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934 		WREG32(CP_CE_UCODE_ADDR, 0);
3935 		for (i = 0; i < fw_size; i++)
3936 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938 
3939 		/* ME */
3940 		fw_data = (const __be32 *)
3941 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943 		WREG32(CP_ME_RAM_WADDR, 0);
3944 		for (i = 0; i < fw_size; i++)
3945 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948 	} else {
3949 		const __be32 *fw_data;
3950 
3951 		/* PFP */
3952 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3953 		WREG32(CP_PFP_UCODE_ADDR, 0);
3954 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956 		WREG32(CP_PFP_UCODE_ADDR, 0);
3957 
3958 		/* CE */
3959 		fw_data = (const __be32 *)rdev->ce_fw->data;
3960 		WREG32(CP_CE_UCODE_ADDR, 0);
3961 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963 		WREG32(CP_CE_UCODE_ADDR, 0);
3964 
3965 		/* ME */
3966 		fw_data = (const __be32 *)rdev->me_fw->data;
3967 		WREG32(CP_ME_RAM_WADDR, 0);
3968 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970 		WREG32(CP_ME_RAM_WADDR, 0);
3971 	}
3972 
3973 	return 0;
3974 }
3975 
3976 /**
3977  * cik_cp_gfx_start - start the gfx ring
3978  *
3979  * @rdev: radeon_device pointer
3980  *
3981  * Enables the ring and loads the clear state context and other
3982  * packets required to init the ring.
3983  * Returns 0 for success, error for failure.
3984  */
3985 static int cik_cp_gfx_start(struct radeon_device *rdev)
3986 {
3987 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988 	int r, i;
3989 
3990 	/* init the CP */
3991 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992 	WREG32(CP_ENDIAN_SWAP, 0);
3993 	WREG32(CP_DEVICE_ID, 1);
3994 
3995 	cik_cp_gfx_enable(rdev, true);
3996 
3997 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998 	if (r) {
3999 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000 		return r;
4001 	}
4002 
4003 	/* init the CE partitions.  CE only used for gfx on CIK */
4004 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006 	radeon_ring_write(ring, 0x8000);
4007 	radeon_ring_write(ring, 0x8000);
4008 
4009 	/* setup clear context state */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012 
4013 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014 	radeon_ring_write(ring, 0x80000000);
4015 	radeon_ring_write(ring, 0x80000000);
4016 
4017 	for (i = 0; i < cik_default_size; i++)
4018 		radeon_ring_write(ring, cik_default_state[i]);
4019 
4020 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022 
4023 	/* set clear context state */
4024 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025 	radeon_ring_write(ring, 0);
4026 
4027 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028 	radeon_ring_write(ring, 0x00000316);
4029 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031 
4032 	radeon_ring_unlock_commit(rdev, ring, false);
4033 
4034 	return 0;
4035 }
4036 
4037 /**
4038  * cik_cp_gfx_fini - stop the gfx ring
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Stop the gfx ring and tear down the driver ring
4043  * info.
4044  */
4045 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046 {
4047 	cik_cp_gfx_enable(rdev, false);
4048 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049 }
4050 
4051 /**
4052  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053  *
4054  * @rdev: radeon_device pointer
4055  *
4056  * Program the location and size of the gfx ring buffer
4057  * and test it to make sure it's working.
4058  * Returns 0 for success, error for failure.
4059  */
4060 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061 {
4062 	struct radeon_ring *ring;
4063 	u32 tmp;
4064 	u32 rb_bufsz;
4065 	u64 rb_addr;
4066 	int r;
4067 
4068 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069 	if (rdev->family != CHIP_HAWAII)
4070 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071 
4072 	/* Set the write pointer delay */
4073 	WREG32(CP_RB_WPTR_DELAY, 0);
4074 
4075 	/* set the RB to use vmid 0 */
4076 	WREG32(CP_RB_VMID, 0);
4077 
4078 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079 
4080 	/* ring 0 - compute and gfx */
4081 	/* Set ring buffer size */
4082 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083 	rb_bufsz = order_base_2(ring->ring_size / 8);
4084 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085 #ifdef __BIG_ENDIAN
4086 	tmp |= BUF_SWAP_32BIT;
4087 #endif
4088 	WREG32(CP_RB0_CNTL, tmp);
4089 
4090 	/* Initialize the ring buffer's read and write pointers */
4091 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092 	ring->wptr = 0;
4093 	WREG32(CP_RB0_WPTR, ring->wptr);
4094 
4095 	/* set the wb address wether it's enabled or not */
4096 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098 
4099 	/* scratch register shadowing is no longer supported */
4100 	WREG32(SCRATCH_UMSK, 0);
4101 
4102 	if (!rdev->wb.enabled)
4103 		tmp |= RB_NO_UPDATE;
4104 
4105 	mdelay(1);
4106 	WREG32(CP_RB0_CNTL, tmp);
4107 
4108 	rb_addr = ring->gpu_addr >> 8;
4109 	WREG32(CP_RB0_BASE, rb_addr);
4110 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111 
4112 	/* start the ring */
4113 	cik_cp_gfx_start(rdev);
4114 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116 	if (r) {
4117 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118 		return r;
4119 	}
4120 
4121 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123 
4124 	return 0;
4125 }
4126 
4127 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128 		     struct radeon_ring *ring)
4129 {
4130 	u32 rptr;
4131 
4132 	if (rdev->wb.enabled)
4133 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4134 	else
4135 		rptr = RREG32(CP_RB0_RPTR);
4136 
4137 	return rptr;
4138 }
4139 
4140 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141 		     struct radeon_ring *ring)
4142 {
4143 	return RREG32(CP_RB0_WPTR);
4144 }
4145 
4146 void cik_gfx_set_wptr(struct radeon_device *rdev,
4147 		      struct radeon_ring *ring)
4148 {
4149 	WREG32(CP_RB0_WPTR, ring->wptr);
4150 	(void)RREG32(CP_RB0_WPTR);
4151 }
4152 
4153 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154 			 struct radeon_ring *ring)
4155 {
4156 	u32 rptr;
4157 
4158 	if (rdev->wb.enabled) {
4159 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4160 	} else {
4161 		mutex_lock(&rdev->srbm_mutex);
4162 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163 		rptr = RREG32(CP_HQD_PQ_RPTR);
4164 		cik_srbm_select(rdev, 0, 0, 0, 0);
4165 		mutex_unlock(&rdev->srbm_mutex);
4166 	}
4167 
4168 	return rptr;
4169 }
4170 
4171 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172 			 struct radeon_ring *ring)
4173 {
4174 	u32 wptr;
4175 
4176 	if (rdev->wb.enabled) {
4177 		/* XXX check if swapping is necessary on BE */
4178 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4179 	} else {
4180 		mutex_lock(&rdev->srbm_mutex);
4181 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182 		wptr = RREG32(CP_HQD_PQ_WPTR);
4183 		cik_srbm_select(rdev, 0, 0, 0, 0);
4184 		mutex_unlock(&rdev->srbm_mutex);
4185 	}
4186 
4187 	return wptr;
4188 }
4189 
4190 void cik_compute_set_wptr(struct radeon_device *rdev,
4191 			  struct radeon_ring *ring)
4192 {
4193 	/* XXX check if swapping is necessary on BE */
4194 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4196 }
4197 
4198 static void cik_compute_stop(struct radeon_device *rdev,
4199 			     struct radeon_ring *ring)
4200 {
4201 	u32 j, tmp;
4202 
4203 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204 	/* Disable wptr polling. */
4205 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206 	tmp &= ~WPTR_POLL_EN;
4207 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208 	/* Disable HQD. */
4209 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4210 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211 		for (j = 0; j < rdev->usec_timeout; j++) {
4212 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213 				break;
4214 			udelay(1);
4215 		}
4216 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217 		WREG32(CP_HQD_PQ_RPTR, 0);
4218 		WREG32(CP_HQD_PQ_WPTR, 0);
4219 	}
4220 	cik_srbm_select(rdev, 0, 0, 0, 0);
4221 }
4222 
4223 /**
4224  * cik_cp_compute_enable - enable/disable the compute CP MEs
4225  *
4226  * @rdev: radeon_device pointer
4227  * @enable: enable or disable the MEs
4228  *
4229  * Halts or unhalts the compute MEs.
4230  */
4231 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232 {
4233 	if (enable)
4234 		WREG32(CP_MEC_CNTL, 0);
4235 	else {
4236 		/*
4237 		 * To make hibernation reliable we need to clear compute ring
4238 		 * configuration before halting the compute ring.
4239 		 */
4240 		mutex_lock(&rdev->srbm_mutex);
4241 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243 		mutex_unlock(&rdev->srbm_mutex);
4244 
4245 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248 	}
4249 	udelay(50);
4250 }
4251 
4252 /**
4253  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254  *
4255  * @rdev: radeon_device pointer
4256  *
4257  * Loads the compute MEC1&2 ucode.
4258  * Returns 0 for success, -EINVAL if the ucode is not available.
4259  */
4260 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261 {
4262 	int i;
4263 
4264 	if (!rdev->mec_fw)
4265 		return -EINVAL;
4266 
4267 	cik_cp_compute_enable(rdev, false);
4268 
4269 	if (rdev->new_fw) {
4270 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4271 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272 		const __le32 *fw_data;
4273 		u32 fw_size;
4274 
4275 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276 
4277 		/* MEC1 */
4278 		fw_data = (const __le32 *)
4279 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282 		for (i = 0; i < fw_size; i++)
4283 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285 
4286 		/* MEC2 */
4287 		if (rdev->family == CHIP_KAVERI) {
4288 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290 
4291 			fw_data = (const __le32 *)
4292 				(rdev->mec2_fw->data +
4293 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296 			for (i = 0; i < fw_size; i++)
4297 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299 		}
4300 	} else {
4301 		const __be32 *fw_data;
4302 
4303 		/* MEC1 */
4304 		fw_data = (const __be32 *)rdev->mec_fw->data;
4305 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309 
4310 		if (rdev->family == CHIP_KAVERI) {
4311 			/* MEC2 */
4312 			fw_data = (const __be32 *)rdev->mec_fw->data;
4313 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317 		}
4318 	}
4319 
4320 	return 0;
4321 }
4322 
4323 /**
4324  * cik_cp_compute_start - start the compute queues
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Enable the compute queues.
4329  * Returns 0 for success, error for failure.
4330  */
4331 static int cik_cp_compute_start(struct radeon_device *rdev)
4332 {
4333 	cik_cp_compute_enable(rdev, true);
4334 
4335 	return 0;
4336 }
4337 
4338 /**
4339  * cik_cp_compute_fini - stop the compute queues
4340  *
4341  * @rdev: radeon_device pointer
4342  *
4343  * Stop the compute queues and tear down the driver queue
4344  * info.
4345  */
4346 static void cik_cp_compute_fini(struct radeon_device *rdev)
4347 {
4348 	int i, idx, r;
4349 
4350 	cik_cp_compute_enable(rdev, false);
4351 
4352 	for (i = 0; i < 2; i++) {
4353 		if (i == 0)
4354 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355 		else
4356 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357 
4358 		if (rdev->ring[idx].mqd_obj) {
4359 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360 			if (unlikely(r != 0))
4361 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362 
4363 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365 
4366 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367 			rdev->ring[idx].mqd_obj = NULL;
4368 		}
4369 	}
4370 }
4371 
4372 static void cik_mec_fini(struct radeon_device *rdev)
4373 {
4374 	int r;
4375 
4376 	if (rdev->mec.hpd_eop_obj) {
4377 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378 		if (unlikely(r != 0))
4379 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382 
4383 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384 		rdev->mec.hpd_eop_obj = NULL;
4385 	}
4386 }
4387 
4388 #define MEC_HPD_SIZE 2048
4389 
4390 static int cik_mec_init(struct radeon_device *rdev)
4391 {
4392 	int r;
4393 	u32 *hpd;
4394 
4395 	/*
4396 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398 	 */
4399 	if (rdev->family == CHIP_KAVERI)
4400 		rdev->mec.num_mec = 2;
4401 	else
4402 		rdev->mec.num_mec = 1;
4403 	rdev->mec.num_pipe = 4;
4404 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405 
4406 	if (rdev->mec.hpd_eop_obj == NULL) {
4407 		r = radeon_bo_create(rdev,
4408 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409 				     PAGE_SIZE, true,
4410 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411 				     &rdev->mec.hpd_eop_obj);
4412 		if (r) {
4413 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414 			return r;
4415 		}
4416 	}
4417 
4418 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419 	if (unlikely(r != 0)) {
4420 		cik_mec_fini(rdev);
4421 		return r;
4422 	}
4423 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424 			  &rdev->mec.hpd_eop_gpu_addr);
4425 	if (r) {
4426 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427 		cik_mec_fini(rdev);
4428 		return r;
4429 	}
4430 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431 	if (r) {
4432 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433 		cik_mec_fini(rdev);
4434 		return r;
4435 	}
4436 
4437 	/* clear memory.  Not sure if this is required or not */
4438 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439 
4440 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442 
4443 	return 0;
4444 }
4445 
4446 struct hqd_registers
4447 {
4448 	u32 cp_mqd_base_addr;
4449 	u32 cp_mqd_base_addr_hi;
4450 	u32 cp_hqd_active;
4451 	u32 cp_hqd_vmid;
4452 	u32 cp_hqd_persistent_state;
4453 	u32 cp_hqd_pipe_priority;
4454 	u32 cp_hqd_queue_priority;
4455 	u32 cp_hqd_quantum;
4456 	u32 cp_hqd_pq_base;
4457 	u32 cp_hqd_pq_base_hi;
4458 	u32 cp_hqd_pq_rptr;
4459 	u32 cp_hqd_pq_rptr_report_addr;
4460 	u32 cp_hqd_pq_rptr_report_addr_hi;
4461 	u32 cp_hqd_pq_wptr_poll_addr;
4462 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4463 	u32 cp_hqd_pq_doorbell_control;
4464 	u32 cp_hqd_pq_wptr;
4465 	u32 cp_hqd_pq_control;
4466 	u32 cp_hqd_ib_base_addr;
4467 	u32 cp_hqd_ib_base_addr_hi;
4468 	u32 cp_hqd_ib_rptr;
4469 	u32 cp_hqd_ib_control;
4470 	u32 cp_hqd_iq_timer;
4471 	u32 cp_hqd_iq_rptr;
4472 	u32 cp_hqd_dequeue_request;
4473 	u32 cp_hqd_dma_offload;
4474 	u32 cp_hqd_sema_cmd;
4475 	u32 cp_hqd_msg_type;
4476 	u32 cp_hqd_atomic0_preop_lo;
4477 	u32 cp_hqd_atomic0_preop_hi;
4478 	u32 cp_hqd_atomic1_preop_lo;
4479 	u32 cp_hqd_atomic1_preop_hi;
4480 	u32 cp_hqd_hq_scheduler0;
4481 	u32 cp_hqd_hq_scheduler1;
4482 	u32 cp_mqd_control;
4483 };
4484 
4485 struct bonaire_mqd
4486 {
4487 	u32 header;
4488 	u32 dispatch_initiator;
4489 	u32 dimensions[3];
4490 	u32 start_idx[3];
4491 	u32 num_threads[3];
4492 	u32 pipeline_stat_enable;
4493 	u32 perf_counter_enable;
4494 	u32 pgm[2];
4495 	u32 tba[2];
4496 	u32 tma[2];
4497 	u32 pgm_rsrc[2];
4498 	u32 vmid;
4499 	u32 resource_limits;
4500 	u32 static_thread_mgmt01[2];
4501 	u32 tmp_ring_size;
4502 	u32 static_thread_mgmt23[2];
4503 	u32 restart[3];
4504 	u32 thread_trace_enable;
4505 	u32 reserved1;
4506 	u32 user_data[16];
4507 	u32 vgtcs_invoke_count[2];
4508 	struct hqd_registers queue_state;
4509 	u32 dequeue_cntr;
4510 	u32 interrupt_queue[64];
4511 };
4512 
4513 /**
4514  * cik_cp_compute_resume - setup the compute queue registers
4515  *
4516  * @rdev: radeon_device pointer
4517  *
4518  * Program the compute queues and test them to make sure they
4519  * are working.
4520  * Returns 0 for success, error for failure.
4521  */
4522 static int cik_cp_compute_resume(struct radeon_device *rdev)
4523 {
4524 	int r, i, j, idx;
4525 	u32 tmp;
4526 	bool use_doorbell = true;
4527 	u64 hqd_gpu_addr;
4528 	u64 mqd_gpu_addr;
4529 	u64 eop_gpu_addr;
4530 	u64 wb_gpu_addr;
4531 	u32 *buf;
4532 	struct bonaire_mqd *mqd;
4533 
4534 	r = cik_cp_compute_start(rdev);
4535 	if (r)
4536 		return r;
4537 
4538 	/* fix up chicken bits */
4539 	tmp = RREG32(CP_CPF_DEBUG);
4540 	tmp |= (1 << 23);
4541 	WREG32(CP_CPF_DEBUG, tmp);
4542 
4543 	/* init the pipes */
4544 	mutex_lock(&rdev->srbm_mutex);
4545 
4546 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547 		int me = (i < 4) ? 1 : 2;
4548 		int pipe = (i < 4) ? i : (i - 4);
4549 
4550 		cik_srbm_select(rdev, me, pipe, 0, 0);
4551 
4552 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553 		/* write the EOP addr */
4554 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556 
4557 		/* set the VMID assigned */
4558 		WREG32(CP_HPD_EOP_VMID, 0);
4559 
4560 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4562 		tmp &= ~EOP_SIZE_MASK;
4563 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4565 
4566 	}
4567 	cik_srbm_select(rdev, 0, 0, 0, 0);
4568 	mutex_unlock(&rdev->srbm_mutex);
4569 
4570 	/* init the queues.  Just two for now. */
4571 	for (i = 0; i < 2; i++) {
4572 		if (i == 0)
4573 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574 		else
4575 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576 
4577 		if (rdev->ring[idx].mqd_obj == NULL) {
4578 			r = radeon_bo_create(rdev,
4579 					     sizeof(struct bonaire_mqd),
4580 					     PAGE_SIZE, true,
4581 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582 					     NULL, &rdev->ring[idx].mqd_obj);
4583 			if (r) {
4584 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585 				return r;
4586 			}
4587 		}
4588 
4589 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590 		if (unlikely(r != 0)) {
4591 			cik_cp_compute_fini(rdev);
4592 			return r;
4593 		}
4594 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595 				  &mqd_gpu_addr);
4596 		if (r) {
4597 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598 			cik_cp_compute_fini(rdev);
4599 			return r;
4600 		}
4601 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602 		if (r) {
4603 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604 			cik_cp_compute_fini(rdev);
4605 			return r;
4606 		}
4607 
4608 		/* init the mqd struct */
4609 		memset(buf, 0, sizeof(struct bonaire_mqd));
4610 
4611 		mqd = (struct bonaire_mqd *)buf;
4612 		mqd->header = 0xC0310800;
4613 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4614 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4615 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4616 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4617 
4618 		mutex_lock(&rdev->srbm_mutex);
4619 		cik_srbm_select(rdev, rdev->ring[idx].me,
4620 				rdev->ring[idx].pipe,
4621 				rdev->ring[idx].queue, 0);
4622 
4623 		/* disable wptr polling */
4624 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625 		tmp &= ~WPTR_POLL_EN;
4626 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627 
4628 		/* enable doorbell? */
4629 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4630 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631 		if (use_doorbell)
4632 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633 		else
4634 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4637 
4638 		/* disable the queue if it's active */
4639 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4640 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4641 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4642 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4643 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644 			for (j = 0; j < rdev->usec_timeout; j++) {
4645 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646 					break;
4647 				udelay(1);
4648 			}
4649 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652 		}
4653 
4654 		/* set the pointer to the MQD */
4655 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659 		/* set MQD vmid to 0 */
4660 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663 
4664 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670 
4671 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4672 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673 		mqd->queue_state.cp_hqd_pq_control &=
4674 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675 
4676 		mqd->queue_state.cp_hqd_pq_control |=
4677 			order_base_2(rdev->ring[idx].ring_size / 8);
4678 		mqd->queue_state.cp_hqd_pq_control |=
4679 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680 #ifdef __BIG_ENDIAN
4681 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682 #endif
4683 		mqd->queue_state.cp_hqd_pq_control &=
4684 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685 		mqd->queue_state.cp_hqd_pq_control |=
4686 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688 
4689 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690 		if (i == 0)
4691 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692 		else
4693 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699 
4700 		/* set the wb address wether it's enabled or not */
4701 		if (i == 0)
4702 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703 		else
4704 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707 			upper_32_bits(wb_gpu_addr) & 0xffff;
4708 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712 
4713 		/* enable the doorbell if requested */
4714 		if (use_doorbell) {
4715 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4716 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4723 
4724 		} else {
4725 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726 		}
4727 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4729 
4730 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731 		rdev->ring[idx].wptr = 0;
4732 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735 
4736 		/* set the vmid for the queue */
4737 		mqd->queue_state.cp_hqd_vmid = 0;
4738 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739 
4740 		/* activate the queue */
4741 		mqd->queue_state.cp_hqd_active = 1;
4742 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743 
4744 		cik_srbm_select(rdev, 0, 0, 0, 0);
4745 		mutex_unlock(&rdev->srbm_mutex);
4746 
4747 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749 
4750 		rdev->ring[idx].ready = true;
4751 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752 		if (r)
4753 			rdev->ring[idx].ready = false;
4754 	}
4755 
4756 	return 0;
4757 }
4758 
4759 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760 {
4761 	cik_cp_gfx_enable(rdev, enable);
4762 	cik_cp_compute_enable(rdev, enable);
4763 }
4764 
4765 static int cik_cp_load_microcode(struct radeon_device *rdev)
4766 {
4767 	int r;
4768 
4769 	r = cik_cp_gfx_load_microcode(rdev);
4770 	if (r)
4771 		return r;
4772 	r = cik_cp_compute_load_microcode(rdev);
4773 	if (r)
4774 		return r;
4775 
4776 	return 0;
4777 }
4778 
4779 static void cik_cp_fini(struct radeon_device *rdev)
4780 {
4781 	cik_cp_gfx_fini(rdev);
4782 	cik_cp_compute_fini(rdev);
4783 }
4784 
4785 static int cik_cp_resume(struct radeon_device *rdev)
4786 {
4787 	int r;
4788 
4789 	cik_enable_gui_idle_interrupt(rdev, false);
4790 
4791 	r = cik_cp_load_microcode(rdev);
4792 	if (r)
4793 		return r;
4794 
4795 	r = cik_cp_gfx_resume(rdev);
4796 	if (r)
4797 		return r;
4798 	r = cik_cp_compute_resume(rdev);
4799 	if (r)
4800 		return r;
4801 
4802 	cik_enable_gui_idle_interrupt(rdev, true);
4803 
4804 	return 0;
4805 }
4806 
4807 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808 {
4809 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4810 		RREG32(GRBM_STATUS));
4811 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4812 		RREG32(GRBM_STATUS2));
4813 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4814 		RREG32(GRBM_STATUS_SE0));
4815 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4816 		RREG32(GRBM_STATUS_SE1));
4817 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4818 		RREG32(GRBM_STATUS_SE2));
4819 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4820 		RREG32(GRBM_STATUS_SE3));
4821 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4822 		RREG32(SRBM_STATUS));
4823 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4824 		RREG32(SRBM_STATUS2));
4825 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4826 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4828 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4831 		 RREG32(CP_STALLED_STAT1));
4832 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4833 		 RREG32(CP_STALLED_STAT2));
4834 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4835 		 RREG32(CP_STALLED_STAT3));
4836 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4837 		 RREG32(CP_CPF_BUSY_STAT));
4838 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839 		 RREG32(CP_CPF_STALLED_STAT1));
4840 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843 		 RREG32(CP_CPC_STALLED_STAT1));
4844 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845 }
4846 
4847 /**
4848  * cik_gpu_check_soft_reset - check which blocks are busy
4849  *
4850  * @rdev: radeon_device pointer
4851  *
4852  * Check which blocks are busy and return the relevant reset
4853  * mask to be used by cik_gpu_soft_reset().
4854  * Returns a mask of the blocks to be reset.
4855  */
4856 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857 {
4858 	u32 reset_mask = 0;
4859 	u32 tmp;
4860 
4861 	/* GRBM_STATUS */
4862 	tmp = RREG32(GRBM_STATUS);
4863 	if (tmp & (PA_BUSY | SC_BUSY |
4864 		   BCI_BUSY | SX_BUSY |
4865 		   TA_BUSY | VGT_BUSY |
4866 		   DB_BUSY | CB_BUSY |
4867 		   GDS_BUSY | SPI_BUSY |
4868 		   IA_BUSY | IA_BUSY_NO_DMA))
4869 		reset_mask |= RADEON_RESET_GFX;
4870 
4871 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872 		reset_mask |= RADEON_RESET_CP;
4873 
4874 	/* GRBM_STATUS2 */
4875 	tmp = RREG32(GRBM_STATUS2);
4876 	if (tmp & RLC_BUSY)
4877 		reset_mask |= RADEON_RESET_RLC;
4878 
4879 	/* SDMA0_STATUS_REG */
4880 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881 	if (!(tmp & SDMA_IDLE))
4882 		reset_mask |= RADEON_RESET_DMA;
4883 
4884 	/* SDMA1_STATUS_REG */
4885 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886 	if (!(tmp & SDMA_IDLE))
4887 		reset_mask |= RADEON_RESET_DMA1;
4888 
4889 	/* SRBM_STATUS2 */
4890 	tmp = RREG32(SRBM_STATUS2);
4891 	if (tmp & SDMA_BUSY)
4892 		reset_mask |= RADEON_RESET_DMA;
4893 
4894 	if (tmp & SDMA1_BUSY)
4895 		reset_mask |= RADEON_RESET_DMA1;
4896 
4897 	/* SRBM_STATUS */
4898 	tmp = RREG32(SRBM_STATUS);
4899 
4900 	if (tmp & IH_BUSY)
4901 		reset_mask |= RADEON_RESET_IH;
4902 
4903 	if (tmp & SEM_BUSY)
4904 		reset_mask |= RADEON_RESET_SEM;
4905 
4906 	if (tmp & GRBM_RQ_PENDING)
4907 		reset_mask |= RADEON_RESET_GRBM;
4908 
4909 	if (tmp & VMC_BUSY)
4910 		reset_mask |= RADEON_RESET_VMC;
4911 
4912 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913 		   MCC_BUSY | MCD_BUSY))
4914 		reset_mask |= RADEON_RESET_MC;
4915 
4916 	if (evergreen_is_display_hung(rdev))
4917 		reset_mask |= RADEON_RESET_DISPLAY;
4918 
4919 	/* Skip MC reset as it's mostly likely not hung, just busy */
4920 	if (reset_mask & RADEON_RESET_MC) {
4921 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922 		reset_mask &= ~RADEON_RESET_MC;
4923 	}
4924 
4925 	return reset_mask;
4926 }
4927 
4928 /**
4929  * cik_gpu_soft_reset - soft reset GPU
4930  *
4931  * @rdev: radeon_device pointer
4932  * @reset_mask: mask of which blocks to reset
4933  *
4934  * Soft reset the blocks specified in @reset_mask.
4935  */
4936 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937 {
4938 	struct evergreen_mc_save save;
4939 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940 	u32 tmp;
4941 
4942 	if (reset_mask == 0)
4943 		return;
4944 
4945 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946 
4947 	cik_print_gpu_status_regs(rdev);
4948 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4949 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952 
4953 	/* disable CG/PG */
4954 	cik_fini_pg(rdev);
4955 	cik_fini_cg(rdev);
4956 
4957 	/* stop the rlc */
4958 	cik_rlc_stop(rdev);
4959 
4960 	/* Disable GFX parsing/prefetching */
4961 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962 
4963 	/* Disable MEC parsing/prefetching */
4964 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965 
4966 	if (reset_mask & RADEON_RESET_DMA) {
4967 		/* sdma0 */
4968 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969 		tmp |= SDMA_HALT;
4970 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971 	}
4972 	if (reset_mask & RADEON_RESET_DMA1) {
4973 		/* sdma1 */
4974 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975 		tmp |= SDMA_HALT;
4976 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977 	}
4978 
4979 	evergreen_mc_stop(rdev, &save);
4980 	if (evergreen_mc_wait_for_idle(rdev)) {
4981 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982 	}
4983 
4984 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986 
4987 	if (reset_mask & RADEON_RESET_CP) {
4988 		grbm_soft_reset |= SOFT_RESET_CP;
4989 
4990 		srbm_soft_reset |= SOFT_RESET_GRBM;
4991 	}
4992 
4993 	if (reset_mask & RADEON_RESET_DMA)
4994 		srbm_soft_reset |= SOFT_RESET_SDMA;
4995 
4996 	if (reset_mask & RADEON_RESET_DMA1)
4997 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4998 
4999 	if (reset_mask & RADEON_RESET_DISPLAY)
5000 		srbm_soft_reset |= SOFT_RESET_DC;
5001 
5002 	if (reset_mask & RADEON_RESET_RLC)
5003 		grbm_soft_reset |= SOFT_RESET_RLC;
5004 
5005 	if (reset_mask & RADEON_RESET_SEM)
5006 		srbm_soft_reset |= SOFT_RESET_SEM;
5007 
5008 	if (reset_mask & RADEON_RESET_IH)
5009 		srbm_soft_reset |= SOFT_RESET_IH;
5010 
5011 	if (reset_mask & RADEON_RESET_GRBM)
5012 		srbm_soft_reset |= SOFT_RESET_GRBM;
5013 
5014 	if (reset_mask & RADEON_RESET_VMC)
5015 		srbm_soft_reset |= SOFT_RESET_VMC;
5016 
5017 	if (!(rdev->flags & RADEON_IS_IGP)) {
5018 		if (reset_mask & RADEON_RESET_MC)
5019 			srbm_soft_reset |= SOFT_RESET_MC;
5020 	}
5021 
5022 	if (grbm_soft_reset) {
5023 		tmp = RREG32(GRBM_SOFT_RESET);
5024 		tmp |= grbm_soft_reset;
5025 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026 		WREG32(GRBM_SOFT_RESET, tmp);
5027 		tmp = RREG32(GRBM_SOFT_RESET);
5028 
5029 		udelay(50);
5030 
5031 		tmp &= ~grbm_soft_reset;
5032 		WREG32(GRBM_SOFT_RESET, tmp);
5033 		tmp = RREG32(GRBM_SOFT_RESET);
5034 	}
5035 
5036 	if (srbm_soft_reset) {
5037 		tmp = RREG32(SRBM_SOFT_RESET);
5038 		tmp |= srbm_soft_reset;
5039 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040 		WREG32(SRBM_SOFT_RESET, tmp);
5041 		tmp = RREG32(SRBM_SOFT_RESET);
5042 
5043 		udelay(50);
5044 
5045 		tmp &= ~srbm_soft_reset;
5046 		WREG32(SRBM_SOFT_RESET, tmp);
5047 		tmp = RREG32(SRBM_SOFT_RESET);
5048 	}
5049 
5050 	/* Wait a little for things to settle down */
5051 	udelay(50);
5052 
5053 	evergreen_mc_resume(rdev, &save);
5054 	udelay(50);
5055 
5056 	cik_print_gpu_status_regs(rdev);
5057 }
5058 
5059 struct kv_reset_save_regs {
5060 	u32 gmcon_reng_execute;
5061 	u32 gmcon_misc;
5062 	u32 gmcon_misc3;
5063 };
5064 
5065 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066 				   struct kv_reset_save_regs *save)
5067 {
5068 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069 	save->gmcon_misc = RREG32(GMCON_MISC);
5070 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071 
5072 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074 						STCTRL_STUTTER_EN));
5075 }
5076 
5077 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078 				      struct kv_reset_save_regs *save)
5079 {
5080 	int i;
5081 
5082 	WREG32(GMCON_PGFSM_WRITE, 0);
5083 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084 
5085 	for (i = 0; i < 5; i++)
5086 		WREG32(GMCON_PGFSM_WRITE, 0);
5087 
5088 	WREG32(GMCON_PGFSM_WRITE, 0);
5089 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090 
5091 	for (i = 0; i < 5; i++)
5092 		WREG32(GMCON_PGFSM_WRITE, 0);
5093 
5094 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096 
5097 	for (i = 0; i < 5; i++)
5098 		WREG32(GMCON_PGFSM_WRITE, 0);
5099 
5100 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102 
5103 	for (i = 0; i < 5; i++)
5104 		WREG32(GMCON_PGFSM_WRITE, 0);
5105 
5106 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108 
5109 	for (i = 0; i < 5; i++)
5110 		WREG32(GMCON_PGFSM_WRITE, 0);
5111 
5112 	WREG32(GMCON_PGFSM_WRITE, 0);
5113 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114 
5115 	for (i = 0; i < 5; i++)
5116 		WREG32(GMCON_PGFSM_WRITE, 0);
5117 
5118 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120 
5121 	for (i = 0; i < 5; i++)
5122 		WREG32(GMCON_PGFSM_WRITE, 0);
5123 
5124 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126 
5127 	for (i = 0; i < 5; i++)
5128 		WREG32(GMCON_PGFSM_WRITE, 0);
5129 
5130 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132 
5133 	for (i = 0; i < 5; i++)
5134 		WREG32(GMCON_PGFSM_WRITE, 0);
5135 
5136 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138 
5139 	for (i = 0; i < 5; i++)
5140 		WREG32(GMCON_PGFSM_WRITE, 0);
5141 
5142 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144 
5145 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5146 	WREG32(GMCON_MISC, save->gmcon_misc);
5147 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148 }
5149 
5150 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151 {
5152 	struct evergreen_mc_save save;
5153 	struct kv_reset_save_regs kv_save = { 0 };
5154 	u32 tmp, i;
5155 
5156 	dev_info(rdev->dev, "GPU pci config reset\n");
5157 
5158 	/* disable dpm? */
5159 
5160 	/* disable cg/pg */
5161 	cik_fini_pg(rdev);
5162 	cik_fini_cg(rdev);
5163 
5164 	/* Disable GFX parsing/prefetching */
5165 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166 
5167 	/* Disable MEC parsing/prefetching */
5168 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169 
5170 	/* sdma0 */
5171 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172 	tmp |= SDMA_HALT;
5173 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174 	/* sdma1 */
5175 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176 	tmp |= SDMA_HALT;
5177 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178 	/* XXX other engines? */
5179 
5180 	/* halt the rlc, disable cp internal ints */
5181 	cik_rlc_stop(rdev);
5182 
5183 	udelay(50);
5184 
5185 	/* disable mem access */
5186 	evergreen_mc_stop(rdev, &save);
5187 	if (evergreen_mc_wait_for_idle(rdev)) {
5188 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189 	}
5190 
5191 	if (rdev->flags & RADEON_IS_IGP)
5192 		kv_save_regs_for_reset(rdev, &kv_save);
5193 
5194 	/* disable BM */
5195 	pci_clear_master(rdev->pdev);
5196 	/* reset */
5197 	radeon_pci_config_reset(rdev);
5198 
5199 	udelay(100);
5200 
5201 	/* wait for asic to come out of reset */
5202 	for (i = 0; i < rdev->usec_timeout; i++) {
5203 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204 			break;
5205 		udelay(1);
5206 	}
5207 
5208 	/* does asic init need to be run first??? */
5209 	if (rdev->flags & RADEON_IS_IGP)
5210 		kv_restore_regs_for_reset(rdev, &kv_save);
5211 }
5212 
5213 /**
5214  * cik_asic_reset - soft reset GPU
5215  *
5216  * @rdev: radeon_device pointer
5217  * @hard: force hard reset
5218  *
5219  * Look up which blocks are hung and attempt
5220  * to reset them.
5221  * Returns 0 for success.
5222  */
5223 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224 {
5225 	u32 reset_mask;
5226 
5227 	if (hard) {
5228 		cik_gpu_pci_config_reset(rdev);
5229 		return 0;
5230 	}
5231 
5232 	reset_mask = cik_gpu_check_soft_reset(rdev);
5233 
5234 	if (reset_mask)
5235 		r600_set_bios_scratch_engine_hung(rdev, true);
5236 
5237 	/* try soft reset */
5238 	cik_gpu_soft_reset(rdev, reset_mask);
5239 
5240 	reset_mask = cik_gpu_check_soft_reset(rdev);
5241 
5242 	/* try pci config reset */
5243 	if (reset_mask && radeon_hard_reset)
5244 		cik_gpu_pci_config_reset(rdev);
5245 
5246 	reset_mask = cik_gpu_check_soft_reset(rdev);
5247 
5248 	if (!reset_mask)
5249 		r600_set_bios_scratch_engine_hung(rdev, false);
5250 
5251 	return 0;
5252 }
5253 
5254 /**
5255  * cik_gfx_is_lockup - check if the 3D engine is locked up
5256  *
5257  * @rdev: radeon_device pointer
5258  * @ring: radeon_ring structure holding ring information
5259  *
5260  * Check if the 3D engine is locked up (CIK).
5261  * Returns true if the engine is locked, false if not.
5262  */
5263 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264 {
5265 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266 
5267 	if (!(reset_mask & (RADEON_RESET_GFX |
5268 			    RADEON_RESET_COMPUTE |
5269 			    RADEON_RESET_CP))) {
5270 		radeon_ring_lockup_update(rdev, ring);
5271 		return false;
5272 	}
5273 	return radeon_ring_test_lockup(rdev, ring);
5274 }
5275 
5276 /* MC */
5277 /**
5278  * cik_mc_program - program the GPU memory controller
5279  *
5280  * @rdev: radeon_device pointer
5281  *
5282  * Set the location of vram, gart, and AGP in the GPU's
5283  * physical address space (CIK).
5284  */
5285 static void cik_mc_program(struct radeon_device *rdev)
5286 {
5287 	struct evergreen_mc_save save;
5288 	u32 tmp;
5289 	int i, j;
5290 
5291 	/* Initialize HDP */
5292 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293 		WREG32((0x2c14 + j), 0x00000000);
5294 		WREG32((0x2c18 + j), 0x00000000);
5295 		WREG32((0x2c1c + j), 0x00000000);
5296 		WREG32((0x2c20 + j), 0x00000000);
5297 		WREG32((0x2c24 + j), 0x00000000);
5298 	}
5299 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300 
5301 	evergreen_mc_stop(rdev, &save);
5302 	if (radeon_mc_wait_for_idle(rdev)) {
5303 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304 	}
5305 	/* Lockout access through VGA aperture*/
5306 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307 	/* Update configuration */
5308 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309 	       rdev->mc.vram_start >> 12);
5310 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311 	       rdev->mc.vram_end >> 12);
5312 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313 	       rdev->vram_scratch.gpu_addr >> 12);
5314 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316 	WREG32(MC_VM_FB_LOCATION, tmp);
5317 	/* XXX double check these! */
5318 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321 	WREG32(MC_VM_AGP_BASE, 0);
5322 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324 	if (radeon_mc_wait_for_idle(rdev)) {
5325 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326 	}
5327 	evergreen_mc_resume(rdev, &save);
5328 	/* we need to own VRAM, so turn off the VGA renderer here
5329 	 * to stop it overwriting our objects */
5330 	rv515_vga_render_disable(rdev);
5331 }
5332 
5333 /**
5334  * cik_mc_init - initialize the memory controller driver params
5335  *
5336  * @rdev: radeon_device pointer
5337  *
5338  * Look up the amount of vram, vram width, and decide how to place
5339  * vram and gart within the GPU's physical address space (CIK).
5340  * Returns 0 for success.
5341  */
5342 static int cik_mc_init(struct radeon_device *rdev)
5343 {
5344 	u32 tmp;
5345 	int chansize, numchan;
5346 
5347 	/* Get VRAM informations */
5348 	rdev->mc.vram_is_ddr = true;
5349 	tmp = RREG32(MC_ARB_RAMCFG);
5350 	if (tmp & CHANSIZE_MASK) {
5351 		chansize = 64;
5352 	} else {
5353 		chansize = 32;
5354 	}
5355 	tmp = RREG32(MC_SHARED_CHMAP);
5356 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357 	case 0:
5358 	default:
5359 		numchan = 1;
5360 		break;
5361 	case 1:
5362 		numchan = 2;
5363 		break;
5364 	case 2:
5365 		numchan = 4;
5366 		break;
5367 	case 3:
5368 		numchan = 8;
5369 		break;
5370 	case 4:
5371 		numchan = 3;
5372 		break;
5373 	case 5:
5374 		numchan = 6;
5375 		break;
5376 	case 6:
5377 		numchan = 10;
5378 		break;
5379 	case 7:
5380 		numchan = 12;
5381 		break;
5382 	case 8:
5383 		numchan = 16;
5384 		break;
5385 	}
5386 	rdev->mc.vram_width = numchan * chansize;
5387 	/* Could aper size report 0 ? */
5388 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390 	/* size in MB on si */
5391 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394 	si_vram_gtt_location(rdev, &rdev->mc);
5395 	radeon_update_bandwidth_info(rdev);
5396 
5397 	return 0;
5398 }
5399 
5400 /*
5401  * GART
5402  * VMID 0 is the physical GPU addresses as used by the kernel.
5403  * VMIDs 1-15 are used for userspace clients and are handled
5404  * by the radeon vm/hsa code.
5405  */
5406 /**
5407  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408  *
5409  * @rdev: radeon_device pointer
5410  *
5411  * Flush the TLB for the VMID 0 page table (CIK).
5412  */
5413 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414 {
5415 	/* flush hdp cache */
5416 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417 
5418 	/* bits 0-15 are the VM contexts0-15 */
5419 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420 }
5421 
5422 /**
5423  * cik_pcie_gart_enable - gart enable
5424  *
5425  * @rdev: radeon_device pointer
5426  *
5427  * This sets up the TLBs, programs the page tables for VMID0,
5428  * sets up the hw for VMIDs 1-15 which are allocated on
5429  * demand, and sets up the global locations for the LDS, GDS,
5430  * and GPUVM for FSA64 clients (CIK).
5431  * Returns 0 for success, errors for failure.
5432  */
5433 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434 {
5435 	int r, i;
5436 
5437 	if (rdev->gart.robj == NULL) {
5438 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439 		return -EINVAL;
5440 	}
5441 	r = radeon_gart_table_vram_pin(rdev);
5442 	if (r)
5443 		return r;
5444 	/* Setup TLB control */
5445 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5446 	       (0xA << 7) |
5447 	       ENABLE_L1_TLB |
5448 	       ENABLE_L1_FRAGMENT_PROCESSING |
5449 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450 	       ENABLE_ADVANCED_DRIVER_MODEL |
5451 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452 	/* Setup L2 cache */
5453 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454 	       ENABLE_L2_FRAGMENT_PROCESSING |
5455 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5458 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461 	       BANK_SELECT(4) |
5462 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463 	/* setup context0 */
5464 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468 			(u32)(rdev->dummy_page.addr >> 12));
5469 	WREG32(VM_CONTEXT0_CNTL2, 0);
5470 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472 
5473 	WREG32(0x15D4, 0);
5474 	WREG32(0x15D8, 0);
5475 	WREG32(0x15DC, 0);
5476 
5477 	/* restore context1-15 */
5478 	/* set vm size, must be a multiple of 4 */
5479 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481 	for (i = 1; i < 16; i++) {
5482 		if (i < 8)
5483 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484 			       rdev->vm_manager.saved_table_addr[i]);
5485 		else
5486 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487 			       rdev->vm_manager.saved_table_addr[i]);
5488 	}
5489 
5490 	/* enable context1-15 */
5491 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492 	       (u32)(rdev->dummy_page.addr >> 12));
5493 	WREG32(VM_CONTEXT1_CNTL2, 4);
5494 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508 
5509 	if (rdev->family == CHIP_KAVERI) {
5510 		u32 tmp = RREG32(CHUB_CONTROL);
5511 		tmp &= ~BYPASS_VM;
5512 		WREG32(CHUB_CONTROL, tmp);
5513 	}
5514 
5515 	/* XXX SH_MEM regs */
5516 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5517 	mutex_lock(&rdev->srbm_mutex);
5518 	for (i = 0; i < 16; i++) {
5519 		cik_srbm_select(rdev, 0, 0, 0, i);
5520 		/* CP and shaders */
5521 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522 		WREG32(SH_MEM_APE1_BASE, 1);
5523 		WREG32(SH_MEM_APE1_LIMIT, 0);
5524 		WREG32(SH_MEM_BASES, 0);
5525 		/* SDMA GFX */
5526 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530 		/* XXX SDMA RLC - todo */
5531 	}
5532 	cik_srbm_select(rdev, 0, 0, 0, 0);
5533 	mutex_unlock(&rdev->srbm_mutex);
5534 
5535 	cik_pcie_gart_tlb_flush(rdev);
5536 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537 		 (unsigned)(rdev->mc.gtt_size >> 20),
5538 		 (unsigned long long)rdev->gart.table_addr);
5539 	rdev->gart.ready = true;
5540 	return 0;
5541 }
5542 
5543 /**
5544  * cik_pcie_gart_disable - gart disable
5545  *
5546  * @rdev: radeon_device pointer
5547  *
5548  * This disables all VM page table (CIK).
5549  */
5550 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551 {
5552 	unsigned i;
5553 
5554 	for (i = 1; i < 16; ++i) {
5555 		uint32_t reg;
5556 		if (i < 8)
5557 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558 		else
5559 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561 	}
5562 
5563 	/* Disable all tables */
5564 	WREG32(VM_CONTEXT0_CNTL, 0);
5565 	WREG32(VM_CONTEXT1_CNTL, 0);
5566 	/* Setup TLB control */
5567 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569 	/* Setup L2 cache */
5570 	WREG32(VM_L2_CNTL,
5571 	       ENABLE_L2_FRAGMENT_PROCESSING |
5572 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5575 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576 	WREG32(VM_L2_CNTL2, 0);
5577 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579 	radeon_gart_table_vram_unpin(rdev);
5580 }
5581 
5582 /**
5583  * cik_pcie_gart_fini - vm fini callback
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Tears down the driver GART/VM setup (CIK).
5588  */
5589 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590 {
5591 	cik_pcie_gart_disable(rdev);
5592 	radeon_gart_table_vram_free(rdev);
5593 	radeon_gart_fini(rdev);
5594 }
5595 
5596 /* vm parser */
5597 /**
5598  * cik_ib_parse - vm ib_parse callback
5599  *
5600  * @rdev: radeon_device pointer
5601  * @ib: indirect buffer pointer
5602  *
5603  * CIK uses hw IB checking so this is a nop (CIK).
5604  */
5605 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606 {
5607 	return 0;
5608 }
5609 
5610 /*
5611  * vm
5612  * VMID 0 is the physical GPU addresses as used by the kernel.
5613  * VMIDs 1-15 are used for userspace clients and are handled
5614  * by the radeon vm/hsa code.
5615  */
5616 /**
5617  * cik_vm_init - cik vm init callback
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Inits cik specific vm parameters (number of VMs, base of vram for
5622  * VMIDs 1-15) (CIK).
5623  * Returns 0 for success.
5624  */
5625 int cik_vm_init(struct radeon_device *rdev)
5626 {
5627 	/*
5628 	 * number of VMs
5629 	 * VMID 0 is reserved for System
5630 	 * radeon graphics/compute will use VMIDs 1-15
5631 	 */
5632 	rdev->vm_manager.nvm = 16;
5633 	/* base offset of vram pages */
5634 	if (rdev->flags & RADEON_IS_IGP) {
5635 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636 		tmp <<= 22;
5637 		rdev->vm_manager.vram_base_offset = tmp;
5638 	} else
5639 		rdev->vm_manager.vram_base_offset = 0;
5640 
5641 	return 0;
5642 }
5643 
5644 /**
5645  * cik_vm_fini - cik vm fini callback
5646  *
5647  * @rdev: radeon_device pointer
5648  *
5649  * Tear down any asic specific VM setup (CIK).
5650  */
5651 void cik_vm_fini(struct radeon_device *rdev)
5652 {
5653 }
5654 
5655 /**
5656  * cik_vm_decode_fault - print human readable fault info
5657  *
5658  * @rdev: radeon_device pointer
5659  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5661  *
5662  * Print human readable fault information (CIK).
5663  */
5664 static void cik_vm_decode_fault(struct radeon_device *rdev,
5665 				u32 status, u32 addr, u32 mc_client)
5666 {
5667 	u32 mc_id;
5668 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672 
5673 	if (rdev->family == CHIP_HAWAII)
5674 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675 	else
5676 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677 
5678 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679 	       protections, vmid, addr,
5680 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681 	       block, mc_client, mc_id);
5682 }
5683 
5684 /**
5685  * cik_vm_flush - cik vm flush using the CP
5686  *
5687  * @rdev: radeon_device pointer
5688  *
5689  * Update the page table base and flush the VM TLB
5690  * using the CP (CIK).
5691  */
5692 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693 		  unsigned vm_id, uint64_t pd_addr)
5694 {
5695 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696 
5697 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699 				 WRITE_DATA_DST_SEL(0)));
5700 	if (vm_id < 8) {
5701 		radeon_ring_write(ring,
5702 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703 	} else {
5704 		radeon_ring_write(ring,
5705 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706 	}
5707 	radeon_ring_write(ring, 0);
5708 	radeon_ring_write(ring, pd_addr >> 12);
5709 
5710 	/* update SH_MEM_* regs */
5711 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713 				 WRITE_DATA_DST_SEL(0)));
5714 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715 	radeon_ring_write(ring, 0);
5716 	radeon_ring_write(ring, VMID(vm_id));
5717 
5718 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720 				 WRITE_DATA_DST_SEL(0)));
5721 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722 	radeon_ring_write(ring, 0);
5723 
5724 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728 
5729 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731 				 WRITE_DATA_DST_SEL(0)));
5732 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733 	radeon_ring_write(ring, 0);
5734 	radeon_ring_write(ring, VMID(0));
5735 
5736 	/* HDP flush */
5737 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738 
5739 	/* bits 0-15 are the VM contexts0-15 */
5740 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742 				 WRITE_DATA_DST_SEL(0)));
5743 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744 	radeon_ring_write(ring, 0);
5745 	radeon_ring_write(ring, 1 << vm_id);
5746 
5747 	/* wait for the invalidate to complete */
5748 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5751 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5752 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753 	radeon_ring_write(ring, 0);
5754 	radeon_ring_write(ring, 0); /* ref */
5755 	radeon_ring_write(ring, 0); /* mask */
5756 	radeon_ring_write(ring, 0x20); /* poll interval */
5757 
5758 	/* compute doesn't have PFP */
5759 	if (usepfp) {
5760 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5761 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762 		radeon_ring_write(ring, 0x0);
5763 	}
5764 }
5765 
5766 /*
5767  * RLC
5768  * The RLC is a multi-purpose microengine that handles a
5769  * variety of functions, the most important of which is
5770  * the interrupt controller.
5771  */
5772 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773 					  bool enable)
5774 {
5775 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776 
5777 	if (enable)
5778 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779 	else
5780 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781 	WREG32(CP_INT_CNTL_RING0, tmp);
5782 }
5783 
5784 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785 {
5786 	u32 tmp;
5787 
5788 	tmp = RREG32(RLC_LB_CNTL);
5789 	if (enable)
5790 		tmp |= LOAD_BALANCE_ENABLE;
5791 	else
5792 		tmp &= ~LOAD_BALANCE_ENABLE;
5793 	WREG32(RLC_LB_CNTL, tmp);
5794 }
5795 
5796 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797 {
5798 	u32 i, j, k;
5799 	u32 mask;
5800 
5801 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803 			cik_select_se_sh(rdev, i, j);
5804 			for (k = 0; k < rdev->usec_timeout; k++) {
5805 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806 					break;
5807 				udelay(1);
5808 			}
5809 		}
5810 	}
5811 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812 
5813 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814 	for (k = 0; k < rdev->usec_timeout; k++) {
5815 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816 			break;
5817 		udelay(1);
5818 	}
5819 }
5820 
5821 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822 {
5823 	u32 tmp;
5824 
5825 	tmp = RREG32(RLC_CNTL);
5826 	if (tmp != rlc)
5827 		WREG32(RLC_CNTL, rlc);
5828 }
5829 
5830 static u32 cik_halt_rlc(struct radeon_device *rdev)
5831 {
5832 	u32 data, orig;
5833 
5834 	orig = data = RREG32(RLC_CNTL);
5835 
5836 	if (data & RLC_ENABLE) {
5837 		u32 i;
5838 
5839 		data &= ~RLC_ENABLE;
5840 		WREG32(RLC_CNTL, data);
5841 
5842 		for (i = 0; i < rdev->usec_timeout; i++) {
5843 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844 				break;
5845 			udelay(1);
5846 		}
5847 
5848 		cik_wait_for_rlc_serdes(rdev);
5849 	}
5850 
5851 	return orig;
5852 }
5853 
5854 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855 {
5856 	u32 tmp, i, mask;
5857 
5858 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859 	WREG32(RLC_GPR_REG2, tmp);
5860 
5861 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862 	for (i = 0; i < rdev->usec_timeout; i++) {
5863 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864 			break;
5865 		udelay(1);
5866 	}
5867 
5868 	for (i = 0; i < rdev->usec_timeout; i++) {
5869 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870 			break;
5871 		udelay(1);
5872 	}
5873 }
5874 
5875 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876 {
5877 	u32 tmp;
5878 
5879 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880 	WREG32(RLC_GPR_REG2, tmp);
5881 }
5882 
5883 /**
5884  * cik_rlc_stop - stop the RLC ME
5885  *
5886  * @rdev: radeon_device pointer
5887  *
5888  * Halt the RLC ME (MicroEngine) (CIK).
5889  */
5890 static void cik_rlc_stop(struct radeon_device *rdev)
5891 {
5892 	WREG32(RLC_CNTL, 0);
5893 
5894 	cik_enable_gui_idle_interrupt(rdev, false);
5895 
5896 	cik_wait_for_rlc_serdes(rdev);
5897 }
5898 
5899 /**
5900  * cik_rlc_start - start the RLC ME
5901  *
5902  * @rdev: radeon_device pointer
5903  *
5904  * Unhalt the RLC ME (MicroEngine) (CIK).
5905  */
5906 static void cik_rlc_start(struct radeon_device *rdev)
5907 {
5908 	WREG32(RLC_CNTL, RLC_ENABLE);
5909 
5910 	cik_enable_gui_idle_interrupt(rdev, true);
5911 
5912 	udelay(50);
5913 }
5914 
5915 /**
5916  * cik_rlc_resume - setup the RLC hw
5917  *
5918  * @rdev: radeon_device pointer
5919  *
5920  * Initialize the RLC registers, load the ucode,
5921  * and start the RLC (CIK).
5922  * Returns 0 for success, -EINVAL if the ucode is not available.
5923  */
5924 static int cik_rlc_resume(struct radeon_device *rdev)
5925 {
5926 	u32 i, size, tmp;
5927 
5928 	if (!rdev->rlc_fw)
5929 		return -EINVAL;
5930 
5931 	cik_rlc_stop(rdev);
5932 
5933 	/* disable CG */
5934 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936 
5937 	si_rlc_reset(rdev);
5938 
5939 	cik_init_pg(rdev);
5940 
5941 	cik_init_cg(rdev);
5942 
5943 	WREG32(RLC_LB_CNTR_INIT, 0);
5944 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945 
5946 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948 	WREG32(RLC_LB_PARAMS, 0x00600408);
5949 	WREG32(RLC_LB_CNTL, 0x80000004);
5950 
5951 	WREG32(RLC_MC_CNTL, 0);
5952 	WREG32(RLC_UCODE_CNTL, 0);
5953 
5954 	if (rdev->new_fw) {
5955 		const struct rlc_firmware_header_v1_0 *hdr =
5956 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957 		const __le32 *fw_data = (const __le32 *)
5958 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959 
5960 		radeon_ucode_print_rlc_hdr(&hdr->header);
5961 
5962 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5964 		for (i = 0; i < size; i++)
5965 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967 	} else {
5968 		const __be32 *fw_data;
5969 
5970 		switch (rdev->family) {
5971 		case CHIP_BONAIRE:
5972 		case CHIP_HAWAII:
5973 		default:
5974 			size = BONAIRE_RLC_UCODE_SIZE;
5975 			break;
5976 		case CHIP_KAVERI:
5977 			size = KV_RLC_UCODE_SIZE;
5978 			break;
5979 		case CHIP_KABINI:
5980 			size = KB_RLC_UCODE_SIZE;
5981 			break;
5982 		case CHIP_MULLINS:
5983 			size = ML_RLC_UCODE_SIZE;
5984 			break;
5985 		}
5986 
5987 		fw_data = (const __be32 *)rdev->rlc_fw->data;
5988 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5989 		for (i = 0; i < size; i++)
5990 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5992 	}
5993 
5994 	/* XXX - find out what chips support lbpw */
5995 	cik_enable_lbpw(rdev, false);
5996 
5997 	if (rdev->family == CHIP_BONAIRE)
5998 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999 
6000 	cik_rlc_start(rdev);
6001 
6002 	return 0;
6003 }
6004 
6005 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006 {
6007 	u32 data, orig, tmp, tmp2;
6008 
6009 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010 
6011 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012 		cik_enable_gui_idle_interrupt(rdev, true);
6013 
6014 		tmp = cik_halt_rlc(rdev);
6015 
6016 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021 
6022 		cik_update_rlc(rdev, tmp);
6023 
6024 		data |= CGCG_EN | CGLS_EN;
6025 	} else {
6026 		cik_enable_gui_idle_interrupt(rdev, false);
6027 
6028 		RREG32(CB_CGTT_SCLK_CTRL);
6029 		RREG32(CB_CGTT_SCLK_CTRL);
6030 		RREG32(CB_CGTT_SCLK_CTRL);
6031 		RREG32(CB_CGTT_SCLK_CTRL);
6032 
6033 		data &= ~(CGCG_EN | CGLS_EN);
6034 	}
6035 
6036 	if (orig != data)
6037 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6038 
6039 }
6040 
6041 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042 {
6043 	u32 data, orig, tmp = 0;
6044 
6045 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6049 				data |= CP_MEM_LS_EN;
6050 				if (orig != data)
6051 					WREG32(CP_MEM_SLP_CNTL, data);
6052 			}
6053 		}
6054 
6055 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056 		data |= 0x00000001;
6057 		data &= 0xfffffffd;
6058 		if (orig != data)
6059 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060 
6061 		tmp = cik_halt_rlc(rdev);
6062 
6063 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067 		WREG32(RLC_SERDES_WR_CTRL, data);
6068 
6069 		cik_update_rlc(rdev, tmp);
6070 
6071 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6073 			data &= ~SM_MODE_MASK;
6074 			data |= SM_MODE(0x2);
6075 			data |= SM_MODE_ENABLE;
6076 			data &= ~CGTS_OVERRIDE;
6077 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079 				data &= ~CGTS_LS_OVERRIDE;
6080 			data &= ~ON_MONITOR_ADD_MASK;
6081 			data |= ON_MONITOR_ADD_EN;
6082 			data |= ON_MONITOR_ADD(0x96);
6083 			if (orig != data)
6084 				WREG32(CGTS_SM_CTRL_REG, data);
6085 		}
6086 	} else {
6087 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088 		data |= 0x00000003;
6089 		if (orig != data)
6090 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091 
6092 		data = RREG32(RLC_MEM_SLP_CNTL);
6093 		if (data & RLC_MEM_LS_EN) {
6094 			data &= ~RLC_MEM_LS_EN;
6095 			WREG32(RLC_MEM_SLP_CNTL, data);
6096 		}
6097 
6098 		data = RREG32(CP_MEM_SLP_CNTL);
6099 		if (data & CP_MEM_LS_EN) {
6100 			data &= ~CP_MEM_LS_EN;
6101 			WREG32(CP_MEM_SLP_CNTL, data);
6102 		}
6103 
6104 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6105 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106 		if (orig != data)
6107 			WREG32(CGTS_SM_CTRL_REG, data);
6108 
6109 		tmp = cik_halt_rlc(rdev);
6110 
6111 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115 		WREG32(RLC_SERDES_WR_CTRL, data);
6116 
6117 		cik_update_rlc(rdev, tmp);
6118 	}
6119 }
6120 
6121 static const u32 mc_cg_registers[] =
6122 {
6123 	MC_HUB_MISC_HUB_CG,
6124 	MC_HUB_MISC_SIP_CG,
6125 	MC_HUB_MISC_VM_CG,
6126 	MC_XPB_CLK_GAT,
6127 	ATC_MISC_CG,
6128 	MC_CITF_MISC_WR_CG,
6129 	MC_CITF_MISC_RD_CG,
6130 	MC_CITF_MISC_VM_CG,
6131 	VM_L2_CG,
6132 };
6133 
6134 static void cik_enable_mc_ls(struct radeon_device *rdev,
6135 			     bool enable)
6136 {
6137 	int i;
6138 	u32 orig, data;
6139 
6140 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141 		orig = data = RREG32(mc_cg_registers[i]);
6142 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143 			data |= MC_LS_ENABLE;
6144 		else
6145 			data &= ~MC_LS_ENABLE;
6146 		if (data != orig)
6147 			WREG32(mc_cg_registers[i], data);
6148 	}
6149 }
6150 
6151 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152 			       bool enable)
6153 {
6154 	int i;
6155 	u32 orig, data;
6156 
6157 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158 		orig = data = RREG32(mc_cg_registers[i]);
6159 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160 			data |= MC_CG_ENABLE;
6161 		else
6162 			data &= ~MC_CG_ENABLE;
6163 		if (data != orig)
6164 			WREG32(mc_cg_registers[i], data);
6165 	}
6166 }
6167 
6168 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169 				 bool enable)
6170 {
6171 	u32 orig, data;
6172 
6173 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176 	} else {
6177 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178 		data |= 0xff000000;
6179 		if (data != orig)
6180 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181 
6182 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183 		data |= 0xff000000;
6184 		if (data != orig)
6185 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186 	}
6187 }
6188 
6189 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190 				 bool enable)
6191 {
6192 	u32 orig, data;
6193 
6194 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196 		data |= 0x100;
6197 		if (orig != data)
6198 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199 
6200 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201 		data |= 0x100;
6202 		if (orig != data)
6203 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204 	} else {
6205 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206 		data &= ~0x100;
6207 		if (orig != data)
6208 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209 
6210 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211 		data &= ~0x100;
6212 		if (orig != data)
6213 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214 	}
6215 }
6216 
6217 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218 				bool enable)
6219 {
6220 	u32 orig, data;
6221 
6222 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224 		data = 0xfff;
6225 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226 
6227 		orig = data = RREG32(UVD_CGC_CTRL);
6228 		data |= DCM;
6229 		if (orig != data)
6230 			WREG32(UVD_CGC_CTRL, data);
6231 	} else {
6232 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233 		data &= ~0xfff;
6234 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235 
6236 		orig = data = RREG32(UVD_CGC_CTRL);
6237 		data &= ~DCM;
6238 		if (orig != data)
6239 			WREG32(UVD_CGC_CTRL, data);
6240 	}
6241 }
6242 
6243 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244 			       bool enable)
6245 {
6246 	u32 orig, data;
6247 
6248 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249 
6250 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253 	else
6254 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256 
6257 	if (orig != data)
6258 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259 }
6260 
6261 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262 				bool enable)
6263 {
6264 	u32 orig, data;
6265 
6266 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267 
6268 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269 		data &= ~CLOCK_GATING_DIS;
6270 	else
6271 		data |= CLOCK_GATING_DIS;
6272 
6273 	if (orig != data)
6274 		WREG32(HDP_HOST_PATH_CNTL, data);
6275 }
6276 
6277 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278 			      bool enable)
6279 {
6280 	u32 orig, data;
6281 
6282 	orig = data = RREG32(HDP_MEM_POWER_LS);
6283 
6284 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285 		data |= HDP_LS_ENABLE;
6286 	else
6287 		data &= ~HDP_LS_ENABLE;
6288 
6289 	if (orig != data)
6290 		WREG32(HDP_MEM_POWER_LS, data);
6291 }
6292 
6293 void cik_update_cg(struct radeon_device *rdev,
6294 		   u32 block, bool enable)
6295 {
6296 
6297 	if (block & RADEON_CG_BLOCK_GFX) {
6298 		cik_enable_gui_idle_interrupt(rdev, false);
6299 		/* order matters! */
6300 		if (enable) {
6301 			cik_enable_mgcg(rdev, true);
6302 			cik_enable_cgcg(rdev, true);
6303 		} else {
6304 			cik_enable_cgcg(rdev, false);
6305 			cik_enable_mgcg(rdev, false);
6306 		}
6307 		cik_enable_gui_idle_interrupt(rdev, true);
6308 	}
6309 
6310 	if (block & RADEON_CG_BLOCK_MC) {
6311 		if (!(rdev->flags & RADEON_IS_IGP)) {
6312 			cik_enable_mc_mgcg(rdev, enable);
6313 			cik_enable_mc_ls(rdev, enable);
6314 		}
6315 	}
6316 
6317 	if (block & RADEON_CG_BLOCK_SDMA) {
6318 		cik_enable_sdma_mgcg(rdev, enable);
6319 		cik_enable_sdma_mgls(rdev, enable);
6320 	}
6321 
6322 	if (block & RADEON_CG_BLOCK_BIF) {
6323 		cik_enable_bif_mgls(rdev, enable);
6324 	}
6325 
6326 	if (block & RADEON_CG_BLOCK_UVD) {
6327 		if (rdev->has_uvd)
6328 			cik_enable_uvd_mgcg(rdev, enable);
6329 	}
6330 
6331 	if (block & RADEON_CG_BLOCK_HDP) {
6332 		cik_enable_hdp_mgcg(rdev, enable);
6333 		cik_enable_hdp_ls(rdev, enable);
6334 	}
6335 
6336 	if (block & RADEON_CG_BLOCK_VCE) {
6337 		vce_v2_0_enable_mgcg(rdev, enable);
6338 	}
6339 }
6340 
6341 static void cik_init_cg(struct radeon_device *rdev)
6342 {
6343 
6344 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345 
6346 	if (rdev->has_uvd)
6347 		si_init_uvd_internal_cg(rdev);
6348 
6349 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350 			     RADEON_CG_BLOCK_SDMA |
6351 			     RADEON_CG_BLOCK_BIF |
6352 			     RADEON_CG_BLOCK_UVD |
6353 			     RADEON_CG_BLOCK_HDP), true);
6354 }
6355 
6356 static void cik_fini_cg(struct radeon_device *rdev)
6357 {
6358 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359 			     RADEON_CG_BLOCK_SDMA |
6360 			     RADEON_CG_BLOCK_BIF |
6361 			     RADEON_CG_BLOCK_UVD |
6362 			     RADEON_CG_BLOCK_HDP), false);
6363 
6364 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365 }
6366 
6367 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368 					  bool enable)
6369 {
6370 	u32 data, orig;
6371 
6372 	orig = data = RREG32(RLC_PG_CNTL);
6373 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375 	else
6376 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377 	if (orig != data)
6378 		WREG32(RLC_PG_CNTL, data);
6379 }
6380 
6381 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382 					  bool enable)
6383 {
6384 	u32 data, orig;
6385 
6386 	orig = data = RREG32(RLC_PG_CNTL);
6387 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389 	else
6390 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391 	if (orig != data)
6392 		WREG32(RLC_PG_CNTL, data);
6393 }
6394 
6395 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396 {
6397 	u32 data, orig;
6398 
6399 	orig = data = RREG32(RLC_PG_CNTL);
6400 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401 		data &= ~DISABLE_CP_PG;
6402 	else
6403 		data |= DISABLE_CP_PG;
6404 	if (orig != data)
6405 		WREG32(RLC_PG_CNTL, data);
6406 }
6407 
6408 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409 {
6410 	u32 data, orig;
6411 
6412 	orig = data = RREG32(RLC_PG_CNTL);
6413 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414 		data &= ~DISABLE_GDS_PG;
6415 	else
6416 		data |= DISABLE_GDS_PG;
6417 	if (orig != data)
6418 		WREG32(RLC_PG_CNTL, data);
6419 }
6420 
6421 #define CP_ME_TABLE_SIZE    96
6422 #define CP_ME_TABLE_OFFSET  2048
6423 #define CP_MEC_TABLE_OFFSET 4096
6424 
6425 void cik_init_cp_pg_table(struct radeon_device *rdev)
6426 {
6427 	volatile u32 *dst_ptr;
6428 	int me, i, max_me = 4;
6429 	u32 bo_offset = 0;
6430 	u32 table_offset, table_size;
6431 
6432 	if (rdev->family == CHIP_KAVERI)
6433 		max_me = 5;
6434 
6435 	if (rdev->rlc.cp_table_ptr == NULL)
6436 		return;
6437 
6438 	/* write the cp table buffer */
6439 	dst_ptr = rdev->rlc.cp_table_ptr;
6440 	for (me = 0; me < max_me; me++) {
6441 		if (rdev->new_fw) {
6442 			const __le32 *fw_data;
6443 			const struct gfx_firmware_header_v1_0 *hdr;
6444 
6445 			if (me == 0) {
6446 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447 				fw_data = (const __le32 *)
6448 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449 				table_offset = le32_to_cpu(hdr->jt_offset);
6450 				table_size = le32_to_cpu(hdr->jt_size);
6451 			} else if (me == 1) {
6452 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453 				fw_data = (const __le32 *)
6454 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455 				table_offset = le32_to_cpu(hdr->jt_offset);
6456 				table_size = le32_to_cpu(hdr->jt_size);
6457 			} else if (me == 2) {
6458 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459 				fw_data = (const __le32 *)
6460 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461 				table_offset = le32_to_cpu(hdr->jt_offset);
6462 				table_size = le32_to_cpu(hdr->jt_size);
6463 			} else if (me == 3) {
6464 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465 				fw_data = (const __le32 *)
6466 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467 				table_offset = le32_to_cpu(hdr->jt_offset);
6468 				table_size = le32_to_cpu(hdr->jt_size);
6469 			} else {
6470 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471 				fw_data = (const __le32 *)
6472 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473 				table_offset = le32_to_cpu(hdr->jt_offset);
6474 				table_size = le32_to_cpu(hdr->jt_size);
6475 			}
6476 
6477 			for (i = 0; i < table_size; i ++) {
6478 				dst_ptr[bo_offset + i] =
6479 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480 			}
6481 			bo_offset += table_size;
6482 		} else {
6483 			const __be32 *fw_data;
6484 			table_size = CP_ME_TABLE_SIZE;
6485 
6486 			if (me == 0) {
6487 				fw_data = (const __be32 *)rdev->ce_fw->data;
6488 				table_offset = CP_ME_TABLE_OFFSET;
6489 			} else if (me == 1) {
6490 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6491 				table_offset = CP_ME_TABLE_OFFSET;
6492 			} else if (me == 2) {
6493 				fw_data = (const __be32 *)rdev->me_fw->data;
6494 				table_offset = CP_ME_TABLE_OFFSET;
6495 			} else {
6496 				fw_data = (const __be32 *)rdev->mec_fw->data;
6497 				table_offset = CP_MEC_TABLE_OFFSET;
6498 			}
6499 
6500 			for (i = 0; i < table_size; i ++) {
6501 				dst_ptr[bo_offset + i] =
6502 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503 			}
6504 			bo_offset += table_size;
6505 		}
6506 	}
6507 }
6508 
6509 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510 				bool enable)
6511 {
6512 	u32 data, orig;
6513 
6514 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515 		orig = data = RREG32(RLC_PG_CNTL);
6516 		data |= GFX_PG_ENABLE;
6517 		if (orig != data)
6518 			WREG32(RLC_PG_CNTL, data);
6519 
6520 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521 		data |= AUTO_PG_EN;
6522 		if (orig != data)
6523 			WREG32(RLC_AUTO_PG_CTRL, data);
6524 	} else {
6525 		orig = data = RREG32(RLC_PG_CNTL);
6526 		data &= ~GFX_PG_ENABLE;
6527 		if (orig != data)
6528 			WREG32(RLC_PG_CNTL, data);
6529 
6530 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531 		data &= ~AUTO_PG_EN;
6532 		if (orig != data)
6533 			WREG32(RLC_AUTO_PG_CTRL, data);
6534 
6535 		data = RREG32(DB_RENDER_CONTROL);
6536 	}
6537 }
6538 
6539 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540 {
6541 	u32 mask = 0, tmp, tmp1;
6542 	int i;
6543 
6544 	cik_select_se_sh(rdev, se, sh);
6545 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548 
6549 	tmp &= 0xffff0000;
6550 
6551 	tmp |= tmp1;
6552 	tmp >>= 16;
6553 
6554 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555 		mask <<= 1;
6556 		mask |= 1;
6557 	}
6558 
6559 	return (~tmp) & mask;
6560 }
6561 
6562 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563 {
6564 	u32 i, j, k, active_cu_number = 0;
6565 	u32 mask, counter, cu_bitmap;
6566 	u32 tmp = 0;
6567 
6568 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570 			mask = 1;
6571 			cu_bitmap = 0;
6572 			counter = 0;
6573 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575 					if (counter < 2)
6576 						cu_bitmap |= mask;
6577 					counter ++;
6578 				}
6579 				mask <<= 1;
6580 			}
6581 
6582 			active_cu_number += counter;
6583 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6584 		}
6585 	}
6586 
6587 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6588 
6589 	tmp = RREG32(RLC_MAX_PG_CU);
6590 	tmp &= ~MAX_PU_CU_MASK;
6591 	tmp |= MAX_PU_CU(active_cu_number);
6592 	WREG32(RLC_MAX_PG_CU, tmp);
6593 }
6594 
6595 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596 				       bool enable)
6597 {
6598 	u32 data, orig;
6599 
6600 	orig = data = RREG32(RLC_PG_CNTL);
6601 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602 		data |= STATIC_PER_CU_PG_ENABLE;
6603 	else
6604 		data &= ~STATIC_PER_CU_PG_ENABLE;
6605 	if (orig != data)
6606 		WREG32(RLC_PG_CNTL, data);
6607 }
6608 
6609 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610 					bool enable)
6611 {
6612 	u32 data, orig;
6613 
6614 	orig = data = RREG32(RLC_PG_CNTL);
6615 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616 		data |= DYN_PER_CU_PG_ENABLE;
6617 	else
6618 		data &= ~DYN_PER_CU_PG_ENABLE;
6619 	if (orig != data)
6620 		WREG32(RLC_PG_CNTL, data);
6621 }
6622 
6623 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6625 
6626 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627 {
6628 	u32 data, orig;
6629 	u32 i;
6630 
6631 	if (rdev->rlc.cs_data) {
6632 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636 	} else {
6637 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638 		for (i = 0; i < 3; i++)
6639 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640 	}
6641 	if (rdev->rlc.reg_list) {
6642 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645 	}
6646 
6647 	orig = data = RREG32(RLC_PG_CNTL);
6648 	data |= GFX_PG_SRC;
6649 	if (orig != data)
6650 		WREG32(RLC_PG_CNTL, data);
6651 
6652 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654 
6655 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656 	data &= ~IDLE_POLL_COUNT_MASK;
6657 	data |= IDLE_POLL_COUNT(0x60);
6658 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659 
6660 	data = 0x10101010;
6661 	WREG32(RLC_PG_DELAY, data);
6662 
6663 	data = RREG32(RLC_PG_DELAY_2);
6664 	data &= ~0xff;
6665 	data |= 0x3;
6666 	WREG32(RLC_PG_DELAY_2, data);
6667 
6668 	data = RREG32(RLC_AUTO_PG_CTRL);
6669 	data &= ~GRBM_REG_SGIT_MASK;
6670 	data |= GRBM_REG_SGIT(0x700);
6671 	WREG32(RLC_AUTO_PG_CTRL, data);
6672 
6673 }
6674 
6675 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676 {
6677 	cik_enable_gfx_cgpg(rdev, enable);
6678 	cik_enable_gfx_static_mgpg(rdev, enable);
6679 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680 }
6681 
6682 u32 cik_get_csb_size(struct radeon_device *rdev)
6683 {
6684 	u32 count = 0;
6685 	const struct cs_section_def *sect = NULL;
6686 	const struct cs_extent_def *ext = NULL;
6687 
6688 	if (rdev->rlc.cs_data == NULL)
6689 		return 0;
6690 
6691 	/* begin clear state */
6692 	count += 2;
6693 	/* context control state */
6694 	count += 3;
6695 
6696 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6698 			if (sect->id == SECT_CONTEXT)
6699 				count += 2 + ext->reg_count;
6700 			else
6701 				return 0;
6702 		}
6703 	}
6704 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6705 	count += 4;
6706 	/* end clear state */
6707 	count += 2;
6708 	/* clear state */
6709 	count += 2;
6710 
6711 	return count;
6712 }
6713 
6714 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715 {
6716 	u32 count = 0, i;
6717 	const struct cs_section_def *sect = NULL;
6718 	const struct cs_extent_def *ext = NULL;
6719 
6720 	if (rdev->rlc.cs_data == NULL)
6721 		return;
6722 	if (buffer == NULL)
6723 		return;
6724 
6725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727 
6728 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729 	buffer[count++] = cpu_to_le32(0x80000000);
6730 	buffer[count++] = cpu_to_le32(0x80000000);
6731 
6732 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6734 			if (sect->id == SECT_CONTEXT) {
6735 				buffer[count++] =
6736 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738 				for (i = 0; i < ext->reg_count; i++)
6739 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6740 			} else {
6741 				return;
6742 			}
6743 		}
6744 	}
6745 
6746 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748 	switch (rdev->family) {
6749 	case CHIP_BONAIRE:
6750 		buffer[count++] = cpu_to_le32(0x16000012);
6751 		buffer[count++] = cpu_to_le32(0x00000000);
6752 		break;
6753 	case CHIP_KAVERI:
6754 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755 		buffer[count++] = cpu_to_le32(0x00000000);
6756 		break;
6757 	case CHIP_KABINI:
6758 	case CHIP_MULLINS:
6759 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760 		buffer[count++] = cpu_to_le32(0x00000000);
6761 		break;
6762 	case CHIP_HAWAII:
6763 		buffer[count++] = cpu_to_le32(0x3a00161a);
6764 		buffer[count++] = cpu_to_le32(0x0000002e);
6765 		break;
6766 	default:
6767 		buffer[count++] = cpu_to_le32(0x00000000);
6768 		buffer[count++] = cpu_to_le32(0x00000000);
6769 		break;
6770 	}
6771 
6772 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774 
6775 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776 	buffer[count++] = cpu_to_le32(0);
6777 }
6778 
6779 static void cik_init_pg(struct radeon_device *rdev)
6780 {
6781 	if (rdev->pg_flags) {
6782 		cik_enable_sck_slowdown_on_pu(rdev, true);
6783 		cik_enable_sck_slowdown_on_pd(rdev, true);
6784 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785 			cik_init_gfx_cgpg(rdev);
6786 			cik_enable_cp_pg(rdev, true);
6787 			cik_enable_gds_pg(rdev, true);
6788 		}
6789 		cik_init_ao_cu_mask(rdev);
6790 		cik_update_gfx_pg(rdev, true);
6791 	}
6792 }
6793 
6794 static void cik_fini_pg(struct radeon_device *rdev)
6795 {
6796 	if (rdev->pg_flags) {
6797 		cik_update_gfx_pg(rdev, false);
6798 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799 			cik_enable_cp_pg(rdev, false);
6800 			cik_enable_gds_pg(rdev, false);
6801 		}
6802 	}
6803 }
6804 
6805 /*
6806  * Interrupts
6807  * Starting with r6xx, interrupts are handled via a ring buffer.
6808  * Ring buffers are areas of GPU accessible memory that the GPU
6809  * writes interrupt vectors into and the host reads vectors out of.
6810  * There is a rptr (read pointer) that determines where the
6811  * host is currently reading, and a wptr (write pointer)
6812  * which determines where the GPU has written.  When the
6813  * pointers are equal, the ring is idle.  When the GPU
6814  * writes vectors to the ring buffer, it increments the
6815  * wptr.  When there is an interrupt, the host then starts
6816  * fetching commands and processing them until the pointers are
6817  * equal again at which point it updates the rptr.
6818  */
6819 
6820 /**
6821  * cik_enable_interrupts - Enable the interrupt ring buffer
6822  *
6823  * @rdev: radeon_device pointer
6824  *
6825  * Enable the interrupt ring buffer (CIK).
6826  */
6827 static void cik_enable_interrupts(struct radeon_device *rdev)
6828 {
6829 	u32 ih_cntl = RREG32(IH_CNTL);
6830 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831 
6832 	ih_cntl |= ENABLE_INTR;
6833 	ih_rb_cntl |= IH_RB_ENABLE;
6834 	WREG32(IH_CNTL, ih_cntl);
6835 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6836 	rdev->ih.enabled = true;
6837 }
6838 
6839 /**
6840  * cik_disable_interrupts - Disable the interrupt ring buffer
6841  *
6842  * @rdev: radeon_device pointer
6843  *
6844  * Disable the interrupt ring buffer (CIK).
6845  */
6846 static void cik_disable_interrupts(struct radeon_device *rdev)
6847 {
6848 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849 	u32 ih_cntl = RREG32(IH_CNTL);
6850 
6851 	ih_rb_cntl &= ~IH_RB_ENABLE;
6852 	ih_cntl &= ~ENABLE_INTR;
6853 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6854 	WREG32(IH_CNTL, ih_cntl);
6855 	/* set rptr, wptr to 0 */
6856 	WREG32(IH_RB_RPTR, 0);
6857 	WREG32(IH_RB_WPTR, 0);
6858 	rdev->ih.enabled = false;
6859 	rdev->ih.rptr = 0;
6860 }
6861 
6862 /**
6863  * cik_disable_interrupt_state - Disable all interrupt sources
6864  *
6865  * @rdev: radeon_device pointer
6866  *
6867  * Clear all interrupt enable bits used by the driver (CIK).
6868  */
6869 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870 {
6871 	u32 tmp;
6872 
6873 	/* gfx ring */
6874 	tmp = RREG32(CP_INT_CNTL_RING0) &
6875 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876 	WREG32(CP_INT_CNTL_RING0, tmp);
6877 	/* sdma */
6878 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882 	/* compute queues */
6883 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891 	/* grbm */
6892 	WREG32(GRBM_INT_CNTL, 0);
6893 	/* SRBM */
6894 	WREG32(SRBM_INT_CNTL, 0);
6895 	/* vline/vblank, etc. */
6896 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898 	if (rdev->num_crtc >= 4) {
6899 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901 	}
6902 	if (rdev->num_crtc >= 6) {
6903 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905 	}
6906 	/* pflip */
6907 	if (rdev->num_crtc >= 2) {
6908 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910 	}
6911 	if (rdev->num_crtc >= 4) {
6912 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914 	}
6915 	if (rdev->num_crtc >= 6) {
6916 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918 	}
6919 
6920 	/* dac hotplug */
6921 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922 
6923 	/* digital hotplug */
6924 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6926 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6928 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6930 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6932 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6934 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6936 
6937 }
6938 
6939 /**
6940  * cik_irq_init - init and enable the interrupt ring
6941  *
6942  * @rdev: radeon_device pointer
6943  *
6944  * Allocate a ring buffer for the interrupt controller,
6945  * enable the RLC, disable interrupts, enable the IH
6946  * ring buffer and enable it (CIK).
6947  * Called at device load and reume.
6948  * Returns 0 for success, errors for failure.
6949  */
6950 static int cik_irq_init(struct radeon_device *rdev)
6951 {
6952 	int ret = 0;
6953 	int rb_bufsz;
6954 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955 
6956 	/* allocate ring */
6957 	ret = r600_ih_ring_alloc(rdev);
6958 	if (ret)
6959 		return ret;
6960 
6961 	/* disable irqs */
6962 	cik_disable_interrupts(rdev);
6963 
6964 	/* init rlc */
6965 	ret = cik_rlc_resume(rdev);
6966 	if (ret) {
6967 		r600_ih_ring_fini(rdev);
6968 		return ret;
6969 	}
6970 
6971 	/* setup interrupt control */
6972 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6973 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6974 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977 	 */
6978 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982 
6983 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985 
6986 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987 		      IH_WPTR_OVERFLOW_CLEAR |
6988 		      (rb_bufsz << 1));
6989 
6990 	if (rdev->wb.enabled)
6991 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992 
6993 	/* set the writeback address whether it's enabled or not */
6994 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996 
6997 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6998 
6999 	/* set rptr, wptr to 0 */
7000 	WREG32(IH_RB_RPTR, 0);
7001 	WREG32(IH_RB_WPTR, 0);
7002 
7003 	/* Default settings for IH_CNTL (disabled at first) */
7004 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005 	/* RPTR_REARM only works if msi's are enabled */
7006 	if (rdev->msi_enabled)
7007 		ih_cntl |= RPTR_REARM;
7008 	WREG32(IH_CNTL, ih_cntl);
7009 
7010 	/* force the active interrupt state to all disabled */
7011 	cik_disable_interrupt_state(rdev);
7012 
7013 	pci_set_master(rdev->pdev);
7014 
7015 	/* enable irqs */
7016 	cik_enable_interrupts(rdev);
7017 
7018 	return ret;
7019 }
7020 
7021 /**
7022  * cik_irq_set - enable/disable interrupt sources
7023  *
7024  * @rdev: radeon_device pointer
7025  *
7026  * Enable interrupt sources on the GPU (vblanks, hpd,
7027  * etc.) (CIK).
7028  * Returns 0 for success, errors for failure.
7029  */
7030 int cik_irq_set(struct radeon_device *rdev)
7031 {
7032 	u32 cp_int_cntl;
7033 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037 	u32 grbm_int_cntl = 0;
7038 	u32 dma_cntl, dma_cntl1;
7039 
7040 	if (!rdev->irq.installed) {
7041 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042 		return -EINVAL;
7043 	}
7044 	/* don't enable anything if the ih is disabled */
7045 	if (!rdev->ih.enabled) {
7046 		cik_disable_interrupts(rdev);
7047 		/* force the active interrupt state to all disabled */
7048 		cik_disable_interrupt_state(rdev);
7049 		return 0;
7050 	}
7051 
7052 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055 
7056 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062 
7063 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065 
7066 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074 
7075 	/* enable CP interrupts on all rings */
7076 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079 	}
7080 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7083 		if (ring->me == 1) {
7084 			switch (ring->pipe) {
7085 			case 0:
7086 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087 				break;
7088 			case 1:
7089 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090 				break;
7091 			case 2:
7092 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093 				break;
7094 			case 3:
7095 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096 				break;
7097 			default:
7098 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099 				break;
7100 			}
7101 		} else if (ring->me == 2) {
7102 			switch (ring->pipe) {
7103 			case 0:
7104 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105 				break;
7106 			case 1:
7107 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108 				break;
7109 			case 2:
7110 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111 				break;
7112 			case 3:
7113 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114 				break;
7115 			default:
7116 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117 				break;
7118 			}
7119 		} else {
7120 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121 		}
7122 	}
7123 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7126 		if (ring->me == 1) {
7127 			switch (ring->pipe) {
7128 			case 0:
7129 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130 				break;
7131 			case 1:
7132 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133 				break;
7134 			case 2:
7135 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136 				break;
7137 			case 3:
7138 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139 				break;
7140 			default:
7141 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142 				break;
7143 			}
7144 		} else if (ring->me == 2) {
7145 			switch (ring->pipe) {
7146 			case 0:
7147 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148 				break;
7149 			case 1:
7150 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151 				break;
7152 			case 2:
7153 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154 				break;
7155 			case 3:
7156 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157 				break;
7158 			default:
7159 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160 				break;
7161 			}
7162 		} else {
7163 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164 		}
7165 	}
7166 
7167 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7169 		dma_cntl |= TRAP_ENABLE;
7170 	}
7171 
7172 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174 		dma_cntl1 |= TRAP_ENABLE;
7175 	}
7176 
7177 	if (rdev->irq.crtc_vblank_int[0] ||
7178 	    atomic_read(&rdev->irq.pflip[0])) {
7179 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7180 		crtc1 |= VBLANK_INTERRUPT_MASK;
7181 	}
7182 	if (rdev->irq.crtc_vblank_int[1] ||
7183 	    atomic_read(&rdev->irq.pflip[1])) {
7184 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7185 		crtc2 |= VBLANK_INTERRUPT_MASK;
7186 	}
7187 	if (rdev->irq.crtc_vblank_int[2] ||
7188 	    atomic_read(&rdev->irq.pflip[2])) {
7189 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7190 		crtc3 |= VBLANK_INTERRUPT_MASK;
7191 	}
7192 	if (rdev->irq.crtc_vblank_int[3] ||
7193 	    atomic_read(&rdev->irq.pflip[3])) {
7194 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7195 		crtc4 |= VBLANK_INTERRUPT_MASK;
7196 	}
7197 	if (rdev->irq.crtc_vblank_int[4] ||
7198 	    atomic_read(&rdev->irq.pflip[4])) {
7199 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7200 		crtc5 |= VBLANK_INTERRUPT_MASK;
7201 	}
7202 	if (rdev->irq.crtc_vblank_int[5] ||
7203 	    atomic_read(&rdev->irq.pflip[5])) {
7204 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7205 		crtc6 |= VBLANK_INTERRUPT_MASK;
7206 	}
7207 	if (rdev->irq.hpd[0]) {
7208 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7209 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210 	}
7211 	if (rdev->irq.hpd[1]) {
7212 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7213 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214 	}
7215 	if (rdev->irq.hpd[2]) {
7216 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7217 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218 	}
7219 	if (rdev->irq.hpd[3]) {
7220 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7221 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222 	}
7223 	if (rdev->irq.hpd[4]) {
7224 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7225 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 	}
7227 	if (rdev->irq.hpd[5]) {
7228 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7229 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230 	}
7231 
7232 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233 
7234 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236 
7237 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245 
7246 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247 
7248 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250 	if (rdev->num_crtc >= 4) {
7251 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253 	}
7254 	if (rdev->num_crtc >= 6) {
7255 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257 	}
7258 
7259 	if (rdev->num_crtc >= 2) {
7260 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261 		       GRPH_PFLIP_INT_MASK);
7262 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263 		       GRPH_PFLIP_INT_MASK);
7264 	}
7265 	if (rdev->num_crtc >= 4) {
7266 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267 		       GRPH_PFLIP_INT_MASK);
7268 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269 		       GRPH_PFLIP_INT_MASK);
7270 	}
7271 	if (rdev->num_crtc >= 6) {
7272 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273 		       GRPH_PFLIP_INT_MASK);
7274 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275 		       GRPH_PFLIP_INT_MASK);
7276 	}
7277 
7278 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284 
7285 	/* posting read */
7286 	RREG32(SRBM_STATUS);
7287 
7288 	return 0;
7289 }
7290 
7291 /**
7292  * cik_irq_ack - ack interrupt sources
7293  *
7294  * @rdev: radeon_device pointer
7295  *
7296  * Ack interrupt sources on the GPU (vblanks, hpd,
7297  * etc.) (CIK).  Certain interrupts sources are sw
7298  * generated and do not require an explicit ack.
7299  */
7300 static inline void cik_irq_ack(struct radeon_device *rdev)
7301 {
7302 	u32 tmp;
7303 
7304 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311 
7312 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7314 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7316 	if (rdev->num_crtc >= 4) {
7317 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7319 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7321 	}
7322 	if (rdev->num_crtc >= 6) {
7323 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7325 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7327 	}
7328 
7329 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331 		       GRPH_PFLIP_INT_CLEAR);
7332 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334 		       GRPH_PFLIP_INT_CLEAR);
7335 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343 
7344 	if (rdev->num_crtc >= 4) {
7345 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347 			       GRPH_PFLIP_INT_CLEAR);
7348 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350 			       GRPH_PFLIP_INT_CLEAR);
7351 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359 	}
7360 
7361 	if (rdev->num_crtc >= 6) {
7362 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364 			       GRPH_PFLIP_INT_CLEAR);
7365 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367 			       GRPH_PFLIP_INT_CLEAR);
7368 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376 	}
7377 
7378 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7380 		tmp |= DC_HPDx_INT_ACK;
7381 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7382 	}
7383 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7385 		tmp |= DC_HPDx_INT_ACK;
7386 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7387 	}
7388 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7390 		tmp |= DC_HPDx_INT_ACK;
7391 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7392 	}
7393 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7395 		tmp |= DC_HPDx_INT_ACK;
7396 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7397 	}
7398 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7400 		tmp |= DC_HPDx_INT_ACK;
7401 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7402 	}
7403 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7405 		tmp |= DC_HPDx_INT_ACK;
7406 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7407 	}
7408 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7410 		tmp |= DC_HPDx_RX_INT_ACK;
7411 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7412 	}
7413 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7415 		tmp |= DC_HPDx_RX_INT_ACK;
7416 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7417 	}
7418 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7420 		tmp |= DC_HPDx_RX_INT_ACK;
7421 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7422 	}
7423 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7425 		tmp |= DC_HPDx_RX_INT_ACK;
7426 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7427 	}
7428 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7430 		tmp |= DC_HPDx_RX_INT_ACK;
7431 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7432 	}
7433 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7435 		tmp |= DC_HPDx_RX_INT_ACK;
7436 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7437 	}
7438 }
7439 
7440 /**
7441  * cik_irq_disable - disable interrupts
7442  *
7443  * @rdev: radeon_device pointer
7444  *
7445  * Disable interrupts on the hw (CIK).
7446  */
7447 static void cik_irq_disable(struct radeon_device *rdev)
7448 {
7449 	cik_disable_interrupts(rdev);
7450 	/* Wait and acknowledge irq */
7451 	mdelay(1);
7452 	cik_irq_ack(rdev);
7453 	cik_disable_interrupt_state(rdev);
7454 }
7455 
7456 /**
7457  * cik_irq_disable - disable interrupts for suspend
7458  *
7459  * @rdev: radeon_device pointer
7460  *
7461  * Disable interrupts and stop the RLC (CIK).
7462  * Used for suspend.
7463  */
7464 static void cik_irq_suspend(struct radeon_device *rdev)
7465 {
7466 	cik_irq_disable(rdev);
7467 	cik_rlc_stop(rdev);
7468 }
7469 
7470 /**
7471  * cik_irq_fini - tear down interrupt support
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Disable interrupts on the hw and free the IH ring
7476  * buffer (CIK).
7477  * Used for driver unload.
7478  */
7479 static void cik_irq_fini(struct radeon_device *rdev)
7480 {
7481 	cik_irq_suspend(rdev);
7482 	r600_ih_ring_fini(rdev);
7483 }
7484 
7485 /**
7486  * cik_get_ih_wptr - get the IH ring buffer wptr
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Get the IH ring buffer wptr from either the register
7491  * or the writeback memory buffer (CIK).  Also check for
7492  * ring buffer overflow and deal with it.
7493  * Used by cik_irq_process().
7494  * Returns the value of the wptr.
7495  */
7496 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497 {
7498 	u32 wptr, tmp;
7499 
7500 	if (rdev->wb.enabled)
7501 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502 	else
7503 		wptr = RREG32(IH_RB_WPTR);
7504 
7505 	if (wptr & RB_OVERFLOW) {
7506 		wptr &= ~RB_OVERFLOW;
7507 		/* When a ring buffer overflow happen start parsing interrupt
7508 		 * from the last not overwritten vector (wptr + 16). Hopefully
7509 		 * this should allow us to catchup.
7510 		 */
7511 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514 		tmp = RREG32(IH_RB_CNTL);
7515 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516 		WREG32(IH_RB_CNTL, tmp);
7517 	}
7518 	return (wptr & rdev->ih.ptr_mask);
7519 }
7520 
7521 /*        CIK IV Ring
7522  * Each IV ring entry is 128 bits:
7523  * [7:0]    - interrupt source id
7524  * [31:8]   - reserved
7525  * [59:32]  - interrupt source data
7526  * [63:60]  - reserved
7527  * [71:64]  - RINGID
7528  *            CP:
7529  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533  *            PIPE_ID - ME0 0=3D
7534  *                    - ME1&2 compute dispatcher (4 pipes each)
7535  *            SDMA:
7536  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539  * [79:72]  - VMID
7540  * [95:80]  - PASID
7541  * [127:96] - reserved
7542  */
7543 /**
7544  * cik_irq_process - interrupt handler
7545  *
7546  * @rdev: radeon_device pointer
7547  *
7548  * Interrupt hander (CIK).  Walk the IH ring,
7549  * ack interrupts and schedule work to handle
7550  * interrupt events.
7551  * Returns irq process return code.
7552  */
7553 int cik_irq_process(struct radeon_device *rdev)
7554 {
7555 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557 	u32 wptr;
7558 	u32 rptr;
7559 	u32 src_id, src_data, ring_id;
7560 	u8 me_id, pipe_id, queue_id;
7561 	u32 ring_index;
7562 	bool queue_hotplug = false;
7563 	bool queue_dp = false;
7564 	bool queue_reset = false;
7565 	u32 addr, status, mc_client;
7566 	bool queue_thermal = false;
7567 
7568 	if (!rdev->ih.enabled || rdev->shutdown)
7569 		return IRQ_NONE;
7570 
7571 	wptr = cik_get_ih_wptr(rdev);
7572 
7573 restart_ih:
7574 	/* is somebody else already processing irqs? */
7575 	if (atomic_xchg(&rdev->ih.lock, 1))
7576 		return IRQ_NONE;
7577 
7578 	rptr = rdev->ih.rptr;
7579 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580 
7581 	/* Order reading of wptr vs. reading of IH ring data */
7582 	rmb();
7583 
7584 	/* display interrupts */
7585 	cik_irq_ack(rdev);
7586 
7587 	while (rptr != wptr) {
7588 		/* wptr/rptr are in bytes! */
7589 		ring_index = rptr / 4;
7590 
7591 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594 
7595 		switch (src_id) {
7596 		case 1: /* D1 vblank/vline */
7597 			switch (src_data) {
7598 			case 0: /* D1 vblank */
7599 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601 
7602 				if (rdev->irq.crtc_vblank_int[0]) {
7603 					drm_handle_vblank(rdev->ddev, 0);
7604 					rdev->pm.vblank_sync = true;
7605 					wake_up(&rdev->irq.vblank_queue);
7606 				}
7607 				if (atomic_read(&rdev->irq.pflip[0]))
7608 					radeon_crtc_handle_vblank(rdev, 0);
7609 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610 				DRM_DEBUG("IH: D1 vblank\n");
7611 
7612 				break;
7613 			case 1: /* D1 vline */
7614 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616 
7617 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618 				DRM_DEBUG("IH: D1 vline\n");
7619 
7620 				break;
7621 			default:
7622 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623 				break;
7624 			}
7625 			break;
7626 		case 2: /* D2 vblank/vline */
7627 			switch (src_data) {
7628 			case 0: /* D2 vblank */
7629 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631 
7632 				if (rdev->irq.crtc_vblank_int[1]) {
7633 					drm_handle_vblank(rdev->ddev, 1);
7634 					rdev->pm.vblank_sync = true;
7635 					wake_up(&rdev->irq.vblank_queue);
7636 				}
7637 				if (atomic_read(&rdev->irq.pflip[1]))
7638 					radeon_crtc_handle_vblank(rdev, 1);
7639 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640 				DRM_DEBUG("IH: D2 vblank\n");
7641 
7642 				break;
7643 			case 1: /* D2 vline */
7644 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646 
7647 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648 				DRM_DEBUG("IH: D2 vline\n");
7649 
7650 				break;
7651 			default:
7652 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653 				break;
7654 			}
7655 			break;
7656 		case 3: /* D3 vblank/vline */
7657 			switch (src_data) {
7658 			case 0: /* D3 vblank */
7659 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661 
7662 				if (rdev->irq.crtc_vblank_int[2]) {
7663 					drm_handle_vblank(rdev->ddev, 2);
7664 					rdev->pm.vblank_sync = true;
7665 					wake_up(&rdev->irq.vblank_queue);
7666 				}
7667 				if (atomic_read(&rdev->irq.pflip[2]))
7668 					radeon_crtc_handle_vblank(rdev, 2);
7669 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670 				DRM_DEBUG("IH: D3 vblank\n");
7671 
7672 				break;
7673 			case 1: /* D3 vline */
7674 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676 
7677 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678 				DRM_DEBUG("IH: D3 vline\n");
7679 
7680 				break;
7681 			default:
7682 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683 				break;
7684 			}
7685 			break;
7686 		case 4: /* D4 vblank/vline */
7687 			switch (src_data) {
7688 			case 0: /* D4 vblank */
7689 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691 
7692 				if (rdev->irq.crtc_vblank_int[3]) {
7693 					drm_handle_vblank(rdev->ddev, 3);
7694 					rdev->pm.vblank_sync = true;
7695 					wake_up(&rdev->irq.vblank_queue);
7696 				}
7697 				if (atomic_read(&rdev->irq.pflip[3]))
7698 					radeon_crtc_handle_vblank(rdev, 3);
7699 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700 				DRM_DEBUG("IH: D4 vblank\n");
7701 
7702 				break;
7703 			case 1: /* D4 vline */
7704 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 
7707 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708 				DRM_DEBUG("IH: D4 vline\n");
7709 
7710 				break;
7711 			default:
7712 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713 				break;
7714 			}
7715 			break;
7716 		case 5: /* D5 vblank/vline */
7717 			switch (src_data) {
7718 			case 0: /* D5 vblank */
7719 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721 
7722 				if (rdev->irq.crtc_vblank_int[4]) {
7723 					drm_handle_vblank(rdev->ddev, 4);
7724 					rdev->pm.vblank_sync = true;
7725 					wake_up(&rdev->irq.vblank_queue);
7726 				}
7727 				if (atomic_read(&rdev->irq.pflip[4]))
7728 					radeon_crtc_handle_vblank(rdev, 4);
7729 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730 				DRM_DEBUG("IH: D5 vblank\n");
7731 
7732 				break;
7733 			case 1: /* D5 vline */
7734 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736 
7737 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738 				DRM_DEBUG("IH: D5 vline\n");
7739 
7740 				break;
7741 			default:
7742 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743 				break;
7744 			}
7745 			break;
7746 		case 6: /* D6 vblank/vline */
7747 			switch (src_data) {
7748 			case 0: /* D6 vblank */
7749 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751 
7752 				if (rdev->irq.crtc_vblank_int[5]) {
7753 					drm_handle_vblank(rdev->ddev, 5);
7754 					rdev->pm.vblank_sync = true;
7755 					wake_up(&rdev->irq.vblank_queue);
7756 				}
7757 				if (atomic_read(&rdev->irq.pflip[5]))
7758 					radeon_crtc_handle_vblank(rdev, 5);
7759 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760 				DRM_DEBUG("IH: D6 vblank\n");
7761 
7762 				break;
7763 			case 1: /* D6 vline */
7764 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766 
7767 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768 				DRM_DEBUG("IH: D6 vline\n");
7769 
7770 				break;
7771 			default:
7772 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773 				break;
7774 			}
7775 			break;
7776 		case 8: /* D1 page flip */
7777 		case 10: /* D2 page flip */
7778 		case 12: /* D3 page flip */
7779 		case 14: /* D4 page flip */
7780 		case 16: /* D5 page flip */
7781 		case 18: /* D6 page flip */
7782 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783 			if (radeon_use_pflipirq > 0)
7784 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785 			break;
7786 		case 42: /* HPD hotplug */
7787 			switch (src_data) {
7788 			case 0:
7789 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791 
7792 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793 				queue_hotplug = true;
7794 				DRM_DEBUG("IH: HPD1\n");
7795 
7796 				break;
7797 			case 1:
7798 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800 
7801 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802 				queue_hotplug = true;
7803 				DRM_DEBUG("IH: HPD2\n");
7804 
7805 				break;
7806 			case 2:
7807 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809 
7810 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811 				queue_hotplug = true;
7812 				DRM_DEBUG("IH: HPD3\n");
7813 
7814 				break;
7815 			case 3:
7816 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818 
7819 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820 				queue_hotplug = true;
7821 				DRM_DEBUG("IH: HPD4\n");
7822 
7823 				break;
7824 			case 4:
7825 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827 
7828 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829 				queue_hotplug = true;
7830 				DRM_DEBUG("IH: HPD5\n");
7831 
7832 				break;
7833 			case 5:
7834 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836 
7837 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838 				queue_hotplug = true;
7839 				DRM_DEBUG("IH: HPD6\n");
7840 
7841 				break;
7842 			case 6:
7843 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845 
7846 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847 				queue_dp = true;
7848 				DRM_DEBUG("IH: HPD_RX 1\n");
7849 
7850 				break;
7851 			case 7:
7852 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854 
7855 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856 				queue_dp = true;
7857 				DRM_DEBUG("IH: HPD_RX 2\n");
7858 
7859 				break;
7860 			case 8:
7861 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863 
7864 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865 				queue_dp = true;
7866 				DRM_DEBUG("IH: HPD_RX 3\n");
7867 
7868 				break;
7869 			case 9:
7870 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872 
7873 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874 				queue_dp = true;
7875 				DRM_DEBUG("IH: HPD_RX 4\n");
7876 
7877 				break;
7878 			case 10:
7879 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881 
7882 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883 				queue_dp = true;
7884 				DRM_DEBUG("IH: HPD_RX 5\n");
7885 
7886 				break;
7887 			case 11:
7888 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890 
7891 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892 				queue_dp = true;
7893 				DRM_DEBUG("IH: HPD_RX 6\n");
7894 
7895 				break;
7896 			default:
7897 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898 				break;
7899 			}
7900 			break;
7901 		case 96:
7902 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903 			WREG32(SRBM_INT_ACK, 0x1);
7904 			break;
7905 		case 124: /* UVD */
7906 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908 			break;
7909 		case 146:
7910 		case 147:
7911 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914 			/* reset addr and status */
7915 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916 			if (addr == 0x0 && status == 0x0)
7917 				break;
7918 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7920 				addr);
7921 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922 				status);
7923 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7924 			break;
7925 		case 167: /* VCE */
7926 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927 			switch (src_data) {
7928 			case 0:
7929 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930 				break;
7931 			case 1:
7932 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933 				break;
7934 			default:
7935 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936 				break;
7937 			}
7938 			break;
7939 		case 176: /* GFX RB CP_INT */
7940 		case 177: /* GFX IB CP_INT */
7941 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942 			break;
7943 		case 181: /* CP EOP event */
7944 			DRM_DEBUG("IH: CP EOP\n");
7945 			/* XXX check the bitfield order! */
7946 			me_id = (ring_id & 0x60) >> 5;
7947 			pipe_id = (ring_id & 0x18) >> 3;
7948 			queue_id = (ring_id & 0x7) >> 0;
7949 			switch (me_id) {
7950 			case 0:
7951 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952 				break;
7953 			case 1:
7954 			case 2:
7955 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959 				break;
7960 			}
7961 			break;
7962 		case 184: /* CP Privileged reg access */
7963 			DRM_ERROR("Illegal register access in command stream\n");
7964 			/* XXX check the bitfield order! */
7965 			me_id = (ring_id & 0x60) >> 5;
7966 			pipe_id = (ring_id & 0x18) >> 3;
7967 			queue_id = (ring_id & 0x7) >> 0;
7968 			switch (me_id) {
7969 			case 0:
7970 				/* This results in a full GPU reset, but all we need to do is soft
7971 				 * reset the CP for gfx
7972 				 */
7973 				queue_reset = true;
7974 				break;
7975 			case 1:
7976 				/* XXX compute */
7977 				queue_reset = true;
7978 				break;
7979 			case 2:
7980 				/* XXX compute */
7981 				queue_reset = true;
7982 				break;
7983 			}
7984 			break;
7985 		case 185: /* CP Privileged inst */
7986 			DRM_ERROR("Illegal instruction in command stream\n");
7987 			/* XXX check the bitfield order! */
7988 			me_id = (ring_id & 0x60) >> 5;
7989 			pipe_id = (ring_id & 0x18) >> 3;
7990 			queue_id = (ring_id & 0x7) >> 0;
7991 			switch (me_id) {
7992 			case 0:
7993 				/* This results in a full GPU reset, but all we need to do is soft
7994 				 * reset the CP for gfx
7995 				 */
7996 				queue_reset = true;
7997 				break;
7998 			case 1:
7999 				/* XXX compute */
8000 				queue_reset = true;
8001 				break;
8002 			case 2:
8003 				/* XXX compute */
8004 				queue_reset = true;
8005 				break;
8006 			}
8007 			break;
8008 		case 224: /* SDMA trap event */
8009 			/* XXX check the bitfield order! */
8010 			me_id = (ring_id & 0x3) >> 0;
8011 			queue_id = (ring_id & 0xc) >> 2;
8012 			DRM_DEBUG("IH: SDMA trap\n");
8013 			switch (me_id) {
8014 			case 0:
8015 				switch (queue_id) {
8016 				case 0:
8017 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018 					break;
8019 				case 1:
8020 					/* XXX compute */
8021 					break;
8022 				case 2:
8023 					/* XXX compute */
8024 					break;
8025 				}
8026 				break;
8027 			case 1:
8028 				switch (queue_id) {
8029 				case 0:
8030 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031 					break;
8032 				case 1:
8033 					/* XXX compute */
8034 					break;
8035 				case 2:
8036 					/* XXX compute */
8037 					break;
8038 				}
8039 				break;
8040 			}
8041 			break;
8042 		case 230: /* thermal low to high */
8043 			DRM_DEBUG("IH: thermal low to high\n");
8044 			rdev->pm.dpm.thermal.high_to_low = false;
8045 			queue_thermal = true;
8046 			break;
8047 		case 231: /* thermal high to low */
8048 			DRM_DEBUG("IH: thermal high to low\n");
8049 			rdev->pm.dpm.thermal.high_to_low = true;
8050 			queue_thermal = true;
8051 			break;
8052 		case 233: /* GUI IDLE */
8053 			DRM_DEBUG("IH: GUI idle\n");
8054 			break;
8055 		case 241: /* SDMA Privileged inst */
8056 		case 247: /* SDMA Privileged inst */
8057 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058 			/* XXX check the bitfield order! */
8059 			me_id = (ring_id & 0x3) >> 0;
8060 			queue_id = (ring_id & 0xc) >> 2;
8061 			switch (me_id) {
8062 			case 0:
8063 				switch (queue_id) {
8064 				case 0:
8065 					queue_reset = true;
8066 					break;
8067 				case 1:
8068 					/* XXX compute */
8069 					queue_reset = true;
8070 					break;
8071 				case 2:
8072 					/* XXX compute */
8073 					queue_reset = true;
8074 					break;
8075 				}
8076 				break;
8077 			case 1:
8078 				switch (queue_id) {
8079 				case 0:
8080 					queue_reset = true;
8081 					break;
8082 				case 1:
8083 					/* XXX compute */
8084 					queue_reset = true;
8085 					break;
8086 				case 2:
8087 					/* XXX compute */
8088 					queue_reset = true;
8089 					break;
8090 				}
8091 				break;
8092 			}
8093 			break;
8094 		default:
8095 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096 			break;
8097 		}
8098 
8099 		/* wptr/rptr are in bytes! */
8100 		rptr += 16;
8101 		rptr &= rdev->ih.ptr_mask;
8102 		WREG32(IH_RB_RPTR, rptr);
8103 	}
8104 	if (queue_dp)
8105 		schedule_work(&rdev->dp_work);
8106 	if (queue_hotplug)
8107 		schedule_delayed_work(&rdev->hotplug_work, 0);
8108 	if (queue_reset) {
8109 		rdev->needs_reset = true;
8110 		wake_up_all(&rdev->fence_queue);
8111 	}
8112 	if (queue_thermal)
8113 		schedule_work(&rdev->pm.dpm.thermal.work);
8114 	rdev->ih.rptr = rptr;
8115 	atomic_set(&rdev->ih.lock, 0);
8116 
8117 	/* make sure wptr hasn't changed while processing */
8118 	wptr = cik_get_ih_wptr(rdev);
8119 	if (wptr != rptr)
8120 		goto restart_ih;
8121 
8122 	return IRQ_HANDLED;
8123 }
8124 
8125 /*
8126  * startup/shutdown callbacks
8127  */
8128 static void cik_uvd_init(struct radeon_device *rdev)
8129 {
8130 	int r;
8131 
8132 	if (!rdev->has_uvd)
8133 		return;
8134 
8135 	r = radeon_uvd_init(rdev);
8136 	if (r) {
8137 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138 		/*
8139 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140 		 * to early fails cik_uvd_start() and thus nothing happens
8141 		 * there. So it is pointless to try to go through that code
8142 		 * hence why we disable uvd here.
8143 		 */
8144 		rdev->has_uvd = 0;
8145 		return;
8146 	}
8147 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149 }
8150 
8151 static void cik_uvd_start(struct radeon_device *rdev)
8152 {
8153 	int r;
8154 
8155 	if (!rdev->has_uvd)
8156 		return;
8157 
8158 	r = radeon_uvd_resume(rdev);
8159 	if (r) {
8160 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161 		goto error;
8162 	}
8163 	r = uvd_v4_2_resume(rdev);
8164 	if (r) {
8165 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166 		goto error;
8167 	}
8168 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169 	if (r) {
8170 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171 		goto error;
8172 	}
8173 	return;
8174 
8175 error:
8176 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177 }
8178 
8179 static void cik_uvd_resume(struct radeon_device *rdev)
8180 {
8181 	struct radeon_ring *ring;
8182 	int r;
8183 
8184 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185 		return;
8186 
8187 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189 	if (r) {
8190 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191 		return;
8192 	}
8193 	r = uvd_v1_0_init(rdev);
8194 	if (r) {
8195 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196 		return;
8197 	}
8198 }
8199 
8200 static void cik_vce_init(struct radeon_device *rdev)
8201 {
8202 	int r;
8203 
8204 	if (!rdev->has_vce)
8205 		return;
8206 
8207 	r = radeon_vce_init(rdev);
8208 	if (r) {
8209 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210 		/*
8211 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212 		 * to early fails cik_vce_start() and thus nothing happens
8213 		 * there. So it is pointless to try to go through that code
8214 		 * hence why we disable vce here.
8215 		 */
8216 		rdev->has_vce = 0;
8217 		return;
8218 	}
8219 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223 }
8224 
8225 static void cik_vce_start(struct radeon_device *rdev)
8226 {
8227 	int r;
8228 
8229 	if (!rdev->has_vce)
8230 		return;
8231 
8232 	r = radeon_vce_resume(rdev);
8233 	if (r) {
8234 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235 		goto error;
8236 	}
8237 	r = vce_v2_0_resume(rdev);
8238 	if (r) {
8239 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240 		goto error;
8241 	}
8242 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243 	if (r) {
8244 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245 		goto error;
8246 	}
8247 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248 	if (r) {
8249 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250 		goto error;
8251 	}
8252 	return;
8253 
8254 error:
8255 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257 }
8258 
8259 static void cik_vce_resume(struct radeon_device *rdev)
8260 {
8261 	struct radeon_ring *ring;
8262 	int r;
8263 
8264 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265 		return;
8266 
8267 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269 	if (r) {
8270 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271 		return;
8272 	}
8273 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275 	if (r) {
8276 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277 		return;
8278 	}
8279 	r = vce_v1_0_init(rdev);
8280 	if (r) {
8281 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282 		return;
8283 	}
8284 }
8285 
8286 /**
8287  * cik_startup - program the asic to a functional state
8288  *
8289  * @rdev: radeon_device pointer
8290  *
8291  * Programs the asic to a functional state (CIK).
8292  * Called by cik_init() and cik_resume().
8293  * Returns 0 for success, error for failure.
8294  */
8295 static int cik_startup(struct radeon_device *rdev)
8296 {
8297 	struct radeon_ring *ring;
8298 	u32 nop;
8299 	int r;
8300 
8301 	/* enable pcie gen2/3 link */
8302 	cik_pcie_gen3_enable(rdev);
8303 	/* enable aspm */
8304 	cik_program_aspm(rdev);
8305 
8306 	/* scratch needs to be initialized before MC */
8307 	r = r600_vram_scratch_init(rdev);
8308 	if (r)
8309 		return r;
8310 
8311 	cik_mc_program(rdev);
8312 
8313 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314 		r = ci_mc_load_microcode(rdev);
8315 		if (r) {
8316 			DRM_ERROR("Failed to load MC firmware!\n");
8317 			return r;
8318 		}
8319 	}
8320 
8321 	r = cik_pcie_gart_enable(rdev);
8322 	if (r)
8323 		return r;
8324 	cik_gpu_init(rdev);
8325 
8326 	/* allocate rlc buffers */
8327 	if (rdev->flags & RADEON_IS_IGP) {
8328 		if (rdev->family == CHIP_KAVERI) {
8329 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330 			rdev->rlc.reg_list_size =
8331 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332 		} else {
8333 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334 			rdev->rlc.reg_list_size =
8335 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336 		}
8337 	}
8338 	rdev->rlc.cs_data = ci_cs_data;
8339 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341 	r = sumo_rlc_init(rdev);
8342 	if (r) {
8343 		DRM_ERROR("Failed to init rlc BOs!\n");
8344 		return r;
8345 	}
8346 
8347 	/* allocate wb buffer */
8348 	r = radeon_wb_init(rdev);
8349 	if (r)
8350 		return r;
8351 
8352 	/* allocate mec buffers */
8353 	r = cik_mec_init(rdev);
8354 	if (r) {
8355 		DRM_ERROR("Failed to init MEC BOs!\n");
8356 		return r;
8357 	}
8358 
8359 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360 	if (r) {
8361 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362 		return r;
8363 	}
8364 
8365 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366 	if (r) {
8367 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368 		return r;
8369 	}
8370 
8371 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372 	if (r) {
8373 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374 		return r;
8375 	}
8376 
8377 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378 	if (r) {
8379 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380 		return r;
8381 	}
8382 
8383 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384 	if (r) {
8385 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386 		return r;
8387 	}
8388 
8389 	cik_uvd_start(rdev);
8390 	cik_vce_start(rdev);
8391 
8392 	/* Enable IRQ */
8393 	if (!rdev->irq.installed) {
8394 		r = radeon_irq_kms_init(rdev);
8395 		if (r)
8396 			return r;
8397 	}
8398 
8399 	r = cik_irq_init(rdev);
8400 	if (r) {
8401 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402 		radeon_irq_kms_fini(rdev);
8403 		return r;
8404 	}
8405 	cik_irq_set(rdev);
8406 
8407 	if (rdev->family == CHIP_HAWAII) {
8408 		if (rdev->new_fw)
8409 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410 		else
8411 			nop = RADEON_CP_PACKET2;
8412 	} else {
8413 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414 	}
8415 
8416 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418 			     nop);
8419 	if (r)
8420 		return r;
8421 
8422 	/* set up the compute queues */
8423 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8424 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426 			     nop);
8427 	if (r)
8428 		return r;
8429 	ring->me = 1; /* first MEC */
8430 	ring->pipe = 0; /* first pipe */
8431 	ring->queue = 0; /* first queue */
8432 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433 
8434 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8435 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437 			     nop);
8438 	if (r)
8439 		return r;
8440 	/* dGPU only have 1 MEC */
8441 	ring->me = 1; /* first MEC */
8442 	ring->pipe = 0; /* first pipe */
8443 	ring->queue = 1; /* second queue */
8444 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445 
8446 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449 	if (r)
8450 		return r;
8451 
8452 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455 	if (r)
8456 		return r;
8457 
8458 	r = cik_cp_resume(rdev);
8459 	if (r)
8460 		return r;
8461 
8462 	r = cik_sdma_resume(rdev);
8463 	if (r)
8464 		return r;
8465 
8466 	cik_uvd_resume(rdev);
8467 	cik_vce_resume(rdev);
8468 
8469 	r = radeon_ib_pool_init(rdev);
8470 	if (r) {
8471 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472 		return r;
8473 	}
8474 
8475 	r = radeon_vm_manager_init(rdev);
8476 	if (r) {
8477 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478 		return r;
8479 	}
8480 
8481 	r = radeon_audio_init(rdev);
8482 	if (r)
8483 		return r;
8484 
8485 	return 0;
8486 }
8487 
8488 /**
8489  * cik_resume - resume the asic to a functional state
8490  *
8491  * @rdev: radeon_device pointer
8492  *
8493  * Programs the asic to a functional state (CIK).
8494  * Called at resume.
8495  * Returns 0 for success, error for failure.
8496  */
8497 int cik_resume(struct radeon_device *rdev)
8498 {
8499 	int r;
8500 
8501 	/* post card */
8502 	atom_asic_init(rdev->mode_info.atom_context);
8503 
8504 	/* init golden registers */
8505 	cik_init_golden_registers(rdev);
8506 
8507 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8508 		radeon_pm_resume(rdev);
8509 
8510 	rdev->accel_working = true;
8511 	r = cik_startup(rdev);
8512 	if (r) {
8513 		DRM_ERROR("cik startup failed on resume\n");
8514 		rdev->accel_working = false;
8515 		return r;
8516 	}
8517 
8518 	return r;
8519 
8520 }
8521 
8522 /**
8523  * cik_suspend - suspend the asic
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Bring the chip into a state suitable for suspend (CIK).
8528  * Called at suspend.
8529  * Returns 0 for success.
8530  */
8531 int cik_suspend(struct radeon_device *rdev)
8532 {
8533 	radeon_pm_suspend(rdev);
8534 	radeon_audio_fini(rdev);
8535 	radeon_vm_manager_fini(rdev);
8536 	cik_cp_enable(rdev, false);
8537 	cik_sdma_enable(rdev, false);
8538 	if (rdev->has_uvd) {
8539 		uvd_v1_0_fini(rdev);
8540 		radeon_uvd_suspend(rdev);
8541 	}
8542 	if (rdev->has_vce)
8543 		radeon_vce_suspend(rdev);
8544 	cik_fini_pg(rdev);
8545 	cik_fini_cg(rdev);
8546 	cik_irq_suspend(rdev);
8547 	radeon_wb_disable(rdev);
8548 	cik_pcie_gart_disable(rdev);
8549 	return 0;
8550 }
8551 
8552 /* Plan is to move initialization in that function and use
8553  * helper function so that radeon_device_init pretty much
8554  * do nothing more than calling asic specific function. This
8555  * should also allow to remove a bunch of callback function
8556  * like vram_info.
8557  */
8558 /**
8559  * cik_init - asic specific driver and hw init
8560  *
8561  * @rdev: radeon_device pointer
8562  *
8563  * Setup asic specific driver variables and program the hw
8564  * to a functional state (CIK).
8565  * Called at driver startup.
8566  * Returns 0 for success, errors for failure.
8567  */
8568 int cik_init(struct radeon_device *rdev)
8569 {
8570 	struct radeon_ring *ring;
8571 	int r;
8572 
8573 	/* Read BIOS */
8574 	if (!radeon_get_bios(rdev)) {
8575 		if (ASIC_IS_AVIVO(rdev))
8576 			return -EINVAL;
8577 	}
8578 	/* Must be an ATOMBIOS */
8579 	if (!rdev->is_atom_bios) {
8580 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581 		return -EINVAL;
8582 	}
8583 	r = radeon_atombios_init(rdev);
8584 	if (r)
8585 		return r;
8586 
8587 	/* Post card if necessary */
8588 	if (!radeon_card_posted(rdev)) {
8589 		if (!rdev->bios) {
8590 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591 			return -EINVAL;
8592 		}
8593 		DRM_INFO("GPU not posted. posting now...\n");
8594 		atom_asic_init(rdev->mode_info.atom_context);
8595 	}
8596 	/* init golden registers */
8597 	cik_init_golden_registers(rdev);
8598 	/* Initialize scratch registers */
8599 	cik_scratch_init(rdev);
8600 	/* Initialize surface registers */
8601 	radeon_surface_init(rdev);
8602 	/* Initialize clocks */
8603 	radeon_get_clock_info(rdev->ddev);
8604 
8605 	/* Fence driver */
8606 	r = radeon_fence_driver_init(rdev);
8607 	if (r)
8608 		return r;
8609 
8610 	/* initialize memory controller */
8611 	r = cik_mc_init(rdev);
8612 	if (r)
8613 		return r;
8614 	/* Memory manager */
8615 	r = radeon_bo_init(rdev);
8616 	if (r)
8617 		return r;
8618 
8619 	if (rdev->flags & RADEON_IS_IGP) {
8620 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622 			r = cik_init_microcode(rdev);
8623 			if (r) {
8624 				DRM_ERROR("Failed to load firmware!\n");
8625 				return r;
8626 			}
8627 		}
8628 	} else {
8629 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631 		    !rdev->mc_fw) {
8632 			r = cik_init_microcode(rdev);
8633 			if (r) {
8634 				DRM_ERROR("Failed to load firmware!\n");
8635 				return r;
8636 			}
8637 		}
8638 	}
8639 
8640 	/* Initialize power management */
8641 	radeon_pm_init(rdev);
8642 
8643 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644 	ring->ring_obj = NULL;
8645 	r600_ring_init(rdev, ring, 1024 * 1024);
8646 
8647 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648 	ring->ring_obj = NULL;
8649 	r600_ring_init(rdev, ring, 1024 * 1024);
8650 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651 	if (r)
8652 		return r;
8653 
8654 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655 	ring->ring_obj = NULL;
8656 	r600_ring_init(rdev, ring, 1024 * 1024);
8657 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658 	if (r)
8659 		return r;
8660 
8661 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662 	ring->ring_obj = NULL;
8663 	r600_ring_init(rdev, ring, 256 * 1024);
8664 
8665 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666 	ring->ring_obj = NULL;
8667 	r600_ring_init(rdev, ring, 256 * 1024);
8668 
8669 	cik_uvd_init(rdev);
8670 	cik_vce_init(rdev);
8671 
8672 	rdev->ih.ring_obj = NULL;
8673 	r600_ih_ring_init(rdev, 64 * 1024);
8674 
8675 	r = r600_pcie_gart_init(rdev);
8676 	if (r)
8677 		return r;
8678 
8679 	rdev->accel_working = true;
8680 	r = cik_startup(rdev);
8681 	if (r) {
8682 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8683 		cik_cp_fini(rdev);
8684 		cik_sdma_fini(rdev);
8685 		cik_irq_fini(rdev);
8686 		sumo_rlc_fini(rdev);
8687 		cik_mec_fini(rdev);
8688 		radeon_wb_fini(rdev);
8689 		radeon_ib_pool_fini(rdev);
8690 		radeon_vm_manager_fini(rdev);
8691 		radeon_irq_kms_fini(rdev);
8692 		cik_pcie_gart_fini(rdev);
8693 		rdev->accel_working = false;
8694 	}
8695 
8696 	/* Don't start up if the MC ucode is missing.
8697 	 * The default clocks and voltages before the MC ucode
8698 	 * is loaded are not suffient for advanced operations.
8699 	 */
8700 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702 		return -EINVAL;
8703 	}
8704 
8705 	return 0;
8706 }
8707 
8708 /**
8709  * cik_fini - asic specific driver and hw fini
8710  *
8711  * @rdev: radeon_device pointer
8712  *
8713  * Tear down the asic specific driver variables and program the hw
8714  * to an idle state (CIK).
8715  * Called at driver unload.
8716  */
8717 void cik_fini(struct radeon_device *rdev)
8718 {
8719 	radeon_pm_fini(rdev);
8720 	cik_cp_fini(rdev);
8721 	cik_sdma_fini(rdev);
8722 	cik_fini_pg(rdev);
8723 	cik_fini_cg(rdev);
8724 	cik_irq_fini(rdev);
8725 	sumo_rlc_fini(rdev);
8726 	cik_mec_fini(rdev);
8727 	radeon_wb_fini(rdev);
8728 	radeon_vm_manager_fini(rdev);
8729 	radeon_ib_pool_fini(rdev);
8730 	radeon_irq_kms_fini(rdev);
8731 	uvd_v1_0_fini(rdev);
8732 	radeon_uvd_fini(rdev);
8733 	radeon_vce_fini(rdev);
8734 	cik_pcie_gart_fini(rdev);
8735 	r600_vram_scratch_fini(rdev);
8736 	radeon_gem_fini(rdev);
8737 	radeon_fence_driver_fini(rdev);
8738 	radeon_bo_fini(rdev);
8739 	radeon_atombios_fini(rdev);
8740 	kfree(rdev->bios);
8741 	rdev->bios = NULL;
8742 }
8743 
8744 void dce8_program_fmt(struct drm_encoder *encoder)
8745 {
8746 	struct drm_device *dev = encoder->dev;
8747 	struct radeon_device *rdev = dev->dev_private;
8748 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751 	int bpc = 0;
8752 	u32 tmp = 0;
8753 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754 
8755 	if (connector) {
8756 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757 		bpc = radeon_get_monitor_bpc(connector);
8758 		dither = radeon_connector->dither;
8759 	}
8760 
8761 	/* LVDS/eDP FMT is set up by atom */
8762 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763 		return;
8764 
8765 	/* not needed for analog */
8766 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768 		return;
8769 
8770 	if (bpc == 0)
8771 		return;
8772 
8773 	switch (bpc) {
8774 	case 6:
8775 		if (dither == RADEON_FMT_DITHER_ENABLE)
8776 			/* XXX sort out optimal dither settings */
8777 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779 		else
8780 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781 		break;
8782 	case 8:
8783 		if (dither == RADEON_FMT_DITHER_ENABLE)
8784 			/* XXX sort out optimal dither settings */
8785 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786 				FMT_RGB_RANDOM_ENABLE |
8787 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788 		else
8789 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790 		break;
8791 	case 10:
8792 		if (dither == RADEON_FMT_DITHER_ENABLE)
8793 			/* XXX sort out optimal dither settings */
8794 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795 				FMT_RGB_RANDOM_ENABLE |
8796 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797 		else
8798 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799 		break;
8800 	default:
8801 		/* not needed */
8802 		break;
8803 	}
8804 
8805 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806 }
8807 
8808 /* display watermark setup */
8809 /**
8810  * dce8_line_buffer_adjust - Set up the line buffer
8811  *
8812  * @rdev: radeon_device pointer
8813  * @radeon_crtc: the selected display controller
8814  * @mode: the current display mode on the selected display
8815  * controller
8816  *
8817  * Setup up the line buffer allocation for
8818  * the selected display controller (CIK).
8819  * Returns the line buffer size in pixels.
8820  */
8821 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822 				   struct radeon_crtc *radeon_crtc,
8823 				   struct drm_display_mode *mode)
8824 {
8825 	u32 tmp, buffer_alloc, i;
8826 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827 	/*
8828 	 * Line Buffer Setup
8829 	 * There are 6 line buffers, one for each display controllers.
8830 	 * There are 3 partitions per LB. Select the number of partitions
8831 	 * to enable based on the display width.  For display widths larger
8832 	 * than 4096, you need use to use 2 display controllers and combine
8833 	 * them using the stereo blender.
8834 	 */
8835 	if (radeon_crtc->base.enabled && mode) {
8836 		if (mode->crtc_hdisplay < 1920) {
8837 			tmp = 1;
8838 			buffer_alloc = 2;
8839 		} else if (mode->crtc_hdisplay < 2560) {
8840 			tmp = 2;
8841 			buffer_alloc = 2;
8842 		} else if (mode->crtc_hdisplay < 4096) {
8843 			tmp = 0;
8844 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845 		} else {
8846 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8847 			tmp = 0;
8848 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849 		}
8850 	} else {
8851 		tmp = 1;
8852 		buffer_alloc = 0;
8853 	}
8854 
8855 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857 
8858 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860 	for (i = 0; i < rdev->usec_timeout; i++) {
8861 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863 			break;
8864 		udelay(1);
8865 	}
8866 
8867 	if (radeon_crtc->base.enabled && mode) {
8868 		switch (tmp) {
8869 		case 0:
8870 		default:
8871 			return 4096 * 2;
8872 		case 1:
8873 			return 1920 * 2;
8874 		case 2:
8875 			return 2560 * 2;
8876 		}
8877 	}
8878 
8879 	/* controller not enabled, so no lb used */
8880 	return 0;
8881 }
8882 
8883 /**
8884  * cik_get_number_of_dram_channels - get the number of dram channels
8885  *
8886  * @rdev: radeon_device pointer
8887  *
8888  * Look up the number of video ram channels (CIK).
8889  * Used for display watermark bandwidth calculations
8890  * Returns the number of dram channels
8891  */
8892 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893 {
8894 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8895 
8896 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897 	case 0:
8898 	default:
8899 		return 1;
8900 	case 1:
8901 		return 2;
8902 	case 2:
8903 		return 4;
8904 	case 3:
8905 		return 8;
8906 	case 4:
8907 		return 3;
8908 	case 5:
8909 		return 6;
8910 	case 6:
8911 		return 10;
8912 	case 7:
8913 		return 12;
8914 	case 8:
8915 		return 16;
8916 	}
8917 }
8918 
8919 struct dce8_wm_params {
8920 	u32 dram_channels; /* number of dram channels */
8921 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8922 	u32 sclk;          /* engine clock in kHz */
8923 	u32 disp_clk;      /* display clock in kHz */
8924 	u32 src_width;     /* viewport width */
8925 	u32 active_time;   /* active display time in ns */
8926 	u32 blank_time;    /* blank time in ns */
8927 	bool interlaced;    /* mode is interlaced */
8928 	fixed20_12 vsc;    /* vertical scale ratio */
8929 	u32 num_heads;     /* number of active crtcs */
8930 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931 	u32 lb_size;       /* line buffer allocated to pipe */
8932 	u32 vtaps;         /* vertical scaler taps */
8933 };
8934 
8935 /**
8936  * dce8_dram_bandwidth - get the dram bandwidth
8937  *
8938  * @wm: watermark calculation data
8939  *
8940  * Calculate the raw dram bandwidth (CIK).
8941  * Used for display watermark bandwidth calculations
8942  * Returns the dram bandwidth in MBytes/s
8943  */
8944 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945 {
8946 	/* Calculate raw DRAM Bandwidth */
8947 	fixed20_12 dram_efficiency; /* 0.7 */
8948 	fixed20_12 yclk, dram_channels, bandwidth;
8949 	fixed20_12 a;
8950 
8951 	a.full = dfixed_const(1000);
8952 	yclk.full = dfixed_const(wm->yclk);
8953 	yclk.full = dfixed_div(yclk, a);
8954 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955 	a.full = dfixed_const(10);
8956 	dram_efficiency.full = dfixed_const(7);
8957 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8959 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960 
8961 	return dfixed_trunc(bandwidth);
8962 }
8963 
8964 /**
8965  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966  *
8967  * @wm: watermark calculation data
8968  *
8969  * Calculate the dram bandwidth used for display (CIK).
8970  * Used for display watermark bandwidth calculations
8971  * Returns the dram bandwidth for display in MBytes/s
8972  */
8973 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974 {
8975 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8976 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977 	fixed20_12 yclk, dram_channels, bandwidth;
8978 	fixed20_12 a;
8979 
8980 	a.full = dfixed_const(1000);
8981 	yclk.full = dfixed_const(wm->yclk);
8982 	yclk.full = dfixed_div(yclk, a);
8983 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984 	a.full = dfixed_const(10);
8985 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8988 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989 
8990 	return dfixed_trunc(bandwidth);
8991 }
8992 
8993 /**
8994  * dce8_data_return_bandwidth - get the data return bandwidth
8995  *
8996  * @wm: watermark calculation data
8997  *
8998  * Calculate the data return bandwidth used for display (CIK).
8999  * Used for display watermark bandwidth calculations
9000  * Returns the data return bandwidth in MBytes/s
9001  */
9002 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003 {
9004 	/* Calculate the display Data return Bandwidth */
9005 	fixed20_12 return_efficiency; /* 0.8 */
9006 	fixed20_12 sclk, bandwidth;
9007 	fixed20_12 a;
9008 
9009 	a.full = dfixed_const(1000);
9010 	sclk.full = dfixed_const(wm->sclk);
9011 	sclk.full = dfixed_div(sclk, a);
9012 	a.full = dfixed_const(10);
9013 	return_efficiency.full = dfixed_const(8);
9014 	return_efficiency.full = dfixed_div(return_efficiency, a);
9015 	a.full = dfixed_const(32);
9016 	bandwidth.full = dfixed_mul(a, sclk);
9017 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018 
9019 	return dfixed_trunc(bandwidth);
9020 }
9021 
9022 /**
9023  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024  *
9025  * @wm: watermark calculation data
9026  *
9027  * Calculate the dmif bandwidth used for display (CIK).
9028  * Used for display watermark bandwidth calculations
9029  * Returns the dmif bandwidth in MBytes/s
9030  */
9031 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032 {
9033 	/* Calculate the DMIF Request Bandwidth */
9034 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035 	fixed20_12 disp_clk, bandwidth;
9036 	fixed20_12 a, b;
9037 
9038 	a.full = dfixed_const(1000);
9039 	disp_clk.full = dfixed_const(wm->disp_clk);
9040 	disp_clk.full = dfixed_div(disp_clk, a);
9041 	a.full = dfixed_const(32);
9042 	b.full = dfixed_mul(a, disp_clk);
9043 
9044 	a.full = dfixed_const(10);
9045 	disp_clk_request_efficiency.full = dfixed_const(8);
9046 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047 
9048 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049 
9050 	return dfixed_trunc(bandwidth);
9051 }
9052 
9053 /**
9054  * dce8_available_bandwidth - get the min available bandwidth
9055  *
9056  * @wm: watermark calculation data
9057  *
9058  * Calculate the min available bandwidth used for display (CIK).
9059  * Used for display watermark bandwidth calculations
9060  * Returns the min available bandwidth in MBytes/s
9061  */
9062 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063 {
9064 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068 
9069 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070 }
9071 
9072 /**
9073  * dce8_average_bandwidth - get the average available bandwidth
9074  *
9075  * @wm: watermark calculation data
9076  *
9077  * Calculate the average available bandwidth used for display (CIK).
9078  * Used for display watermark bandwidth calculations
9079  * Returns the average available bandwidth in MBytes/s
9080  */
9081 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082 {
9083 	/* Calculate the display mode Average Bandwidth
9084 	 * DisplayMode should contain the source and destination dimensions,
9085 	 * timing, etc.
9086 	 */
9087 	fixed20_12 bpp;
9088 	fixed20_12 line_time;
9089 	fixed20_12 src_width;
9090 	fixed20_12 bandwidth;
9091 	fixed20_12 a;
9092 
9093 	a.full = dfixed_const(1000);
9094 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095 	line_time.full = dfixed_div(line_time, a);
9096 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9097 	src_width.full = dfixed_const(wm->src_width);
9098 	bandwidth.full = dfixed_mul(src_width, bpp);
9099 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100 	bandwidth.full = dfixed_div(bandwidth, line_time);
9101 
9102 	return dfixed_trunc(bandwidth);
9103 }
9104 
9105 /**
9106  * dce8_latency_watermark - get the latency watermark
9107  *
9108  * @wm: watermark calculation data
9109  *
9110  * Calculate the latency watermark (CIK).
9111  * Used for display watermark bandwidth calculations
9112  * Returns the latency watermark in ns
9113  */
9114 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115 {
9116 	/* First calculate the latency in ns */
9117 	u32 mc_latency = 2000; /* 2000 ns. */
9118 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9119 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123 		(wm->num_heads * cursor_line_pair_return_time);
9124 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126 	u32 tmp, dmif_size = 12288;
9127 	fixed20_12 a, b, c;
9128 
9129 	if (wm->num_heads == 0)
9130 		return 0;
9131 
9132 	a.full = dfixed_const(2);
9133 	b.full = dfixed_const(1);
9134 	if ((wm->vsc.full > a.full) ||
9135 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136 	    (wm->vtaps >= 5) ||
9137 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9138 		max_src_lines_per_dst_line = 4;
9139 	else
9140 		max_src_lines_per_dst_line = 2;
9141 
9142 	a.full = dfixed_const(available_bandwidth);
9143 	b.full = dfixed_const(wm->num_heads);
9144 	a.full = dfixed_div(a, b);
9145 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146 	tmp = min(dfixed_trunc(a), tmp);
9147 
9148 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149 
9150 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151 	b.full = dfixed_const(1000);
9152 	c.full = dfixed_const(lb_fill_bw);
9153 	b.full = dfixed_div(c, b);
9154 	a.full = dfixed_div(a, b);
9155 	line_fill_time = dfixed_trunc(a);
9156 
9157 	if (line_fill_time < wm->active_time)
9158 		return latency;
9159 	else
9160 		return latency + (line_fill_time - wm->active_time);
9161 
9162 }
9163 
9164 /**
9165  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166  * average and available dram bandwidth
9167  *
9168  * @wm: watermark calculation data
9169  *
9170  * Check if the display average bandwidth fits in the display
9171  * dram bandwidth (CIK).
9172  * Used for display watermark bandwidth calculations
9173  * Returns true if the display fits, false if not.
9174  */
9175 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176 {
9177 	if (dce8_average_bandwidth(wm) <=
9178 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179 		return true;
9180 	else
9181 		return false;
9182 }
9183 
9184 /**
9185  * dce8_average_bandwidth_vs_available_bandwidth - check
9186  * average and available bandwidth
9187  *
9188  * @wm: watermark calculation data
9189  *
9190  * Check if the display average bandwidth fits in the display
9191  * available bandwidth (CIK).
9192  * Used for display watermark bandwidth calculations
9193  * Returns true if the display fits, false if not.
9194  */
9195 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196 {
9197 	if (dce8_average_bandwidth(wm) <=
9198 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9199 		return true;
9200 	else
9201 		return false;
9202 }
9203 
9204 /**
9205  * dce8_check_latency_hiding - check latency hiding
9206  *
9207  * @wm: watermark calculation data
9208  *
9209  * Check latency hiding (CIK).
9210  * Used for display watermark bandwidth calculations
9211  * Returns true if the display fits, false if not.
9212  */
9213 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214 {
9215 	u32 lb_partitions = wm->lb_size / wm->src_width;
9216 	u32 line_time = wm->active_time + wm->blank_time;
9217 	u32 latency_tolerant_lines;
9218 	u32 latency_hiding;
9219 	fixed20_12 a;
9220 
9221 	a.full = dfixed_const(1);
9222 	if (wm->vsc.full > a.full)
9223 		latency_tolerant_lines = 1;
9224 	else {
9225 		if (lb_partitions <= (wm->vtaps + 1))
9226 			latency_tolerant_lines = 1;
9227 		else
9228 			latency_tolerant_lines = 2;
9229 	}
9230 
9231 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232 
9233 	if (dce8_latency_watermark(wm) <= latency_hiding)
9234 		return true;
9235 	else
9236 		return false;
9237 }
9238 
9239 /**
9240  * dce8_program_watermarks - program display watermarks
9241  *
9242  * @rdev: radeon_device pointer
9243  * @radeon_crtc: the selected display controller
9244  * @lb_size: line buffer size
9245  * @num_heads: number of display controllers in use
9246  *
9247  * Calculate and program the display watermarks for the
9248  * selected display controller (CIK).
9249  */
9250 static void dce8_program_watermarks(struct radeon_device *rdev,
9251 				    struct radeon_crtc *radeon_crtc,
9252 				    u32 lb_size, u32 num_heads)
9253 {
9254 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255 	struct dce8_wm_params wm_low, wm_high;
9256 	u32 active_time;
9257 	u32 line_time = 0;
9258 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259 	u32 tmp, wm_mask;
9260 
9261 	if (radeon_crtc->base.enabled && num_heads && mode) {
9262 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263 					    (u32)mode->clock);
9264 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265 					  (u32)mode->clock);
9266 		line_time = min(line_time, (u32)65535);
9267 
9268 		/* watermark for high clocks */
9269 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270 		    rdev->pm.dpm_enabled) {
9271 			wm_high.yclk =
9272 				radeon_dpm_get_mclk(rdev, false) * 10;
9273 			wm_high.sclk =
9274 				radeon_dpm_get_sclk(rdev, false) * 10;
9275 		} else {
9276 			wm_high.yclk = rdev->pm.current_mclk * 10;
9277 			wm_high.sclk = rdev->pm.current_sclk * 10;
9278 		}
9279 
9280 		wm_high.disp_clk = mode->clock;
9281 		wm_high.src_width = mode->crtc_hdisplay;
9282 		wm_high.active_time = active_time;
9283 		wm_high.blank_time = line_time - wm_high.active_time;
9284 		wm_high.interlaced = false;
9285 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286 			wm_high.interlaced = true;
9287 		wm_high.vsc = radeon_crtc->vsc;
9288 		wm_high.vtaps = 1;
9289 		if (radeon_crtc->rmx_type != RMX_OFF)
9290 			wm_high.vtaps = 2;
9291 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292 		wm_high.lb_size = lb_size;
9293 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294 		wm_high.num_heads = num_heads;
9295 
9296 		/* set for high clocks */
9297 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298 
9299 		/* possibly force display priority to high */
9300 		/* should really do this at mode validation time... */
9301 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303 		    !dce8_check_latency_hiding(&wm_high) ||
9304 		    (rdev->disp_priority == 2)) {
9305 			DRM_DEBUG_KMS("force priority to high\n");
9306 		}
9307 
9308 		/* watermark for low clocks */
9309 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310 		    rdev->pm.dpm_enabled) {
9311 			wm_low.yclk =
9312 				radeon_dpm_get_mclk(rdev, true) * 10;
9313 			wm_low.sclk =
9314 				radeon_dpm_get_sclk(rdev, true) * 10;
9315 		} else {
9316 			wm_low.yclk = rdev->pm.current_mclk * 10;
9317 			wm_low.sclk = rdev->pm.current_sclk * 10;
9318 		}
9319 
9320 		wm_low.disp_clk = mode->clock;
9321 		wm_low.src_width = mode->crtc_hdisplay;
9322 		wm_low.active_time = active_time;
9323 		wm_low.blank_time = line_time - wm_low.active_time;
9324 		wm_low.interlaced = false;
9325 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326 			wm_low.interlaced = true;
9327 		wm_low.vsc = radeon_crtc->vsc;
9328 		wm_low.vtaps = 1;
9329 		if (radeon_crtc->rmx_type != RMX_OFF)
9330 			wm_low.vtaps = 2;
9331 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332 		wm_low.lb_size = lb_size;
9333 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334 		wm_low.num_heads = num_heads;
9335 
9336 		/* set for low clocks */
9337 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338 
9339 		/* possibly force display priority to high */
9340 		/* should really do this at mode validation time... */
9341 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343 		    !dce8_check_latency_hiding(&wm_low) ||
9344 		    (rdev->disp_priority == 2)) {
9345 			DRM_DEBUG_KMS("force priority to high\n");
9346 		}
9347 
9348 		/* Save number of lines the linebuffer leads before the scanout */
9349 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350 	}
9351 
9352 	/* select wm A */
9353 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354 	tmp = wm_mask;
9355 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9356 	tmp |= LATENCY_WATERMARK_MASK(1);
9357 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360 		LATENCY_HIGH_WATERMARK(line_time)));
9361 	/* select wm B */
9362 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9364 	tmp |= LATENCY_WATERMARK_MASK(2);
9365 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368 		LATENCY_HIGH_WATERMARK(line_time)));
9369 	/* restore original selection */
9370 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371 
9372 	/* save values for DPM */
9373 	radeon_crtc->line_time = line_time;
9374 	radeon_crtc->wm_high = latency_watermark_a;
9375 	radeon_crtc->wm_low = latency_watermark_b;
9376 }
9377 
9378 /**
9379  * dce8_bandwidth_update - program display watermarks
9380  *
9381  * @rdev: radeon_device pointer
9382  *
9383  * Calculate and program the display watermarks and line
9384  * buffer allocation (CIK).
9385  */
9386 void dce8_bandwidth_update(struct radeon_device *rdev)
9387 {
9388 	struct drm_display_mode *mode = NULL;
9389 	u32 num_heads = 0, lb_size;
9390 	int i;
9391 
9392 	if (!rdev->mode_info.mode_config_initialized)
9393 		return;
9394 
9395 	radeon_update_display_priority(rdev);
9396 
9397 	for (i = 0; i < rdev->num_crtc; i++) {
9398 		if (rdev->mode_info.crtcs[i]->base.enabled)
9399 			num_heads++;
9400 	}
9401 	for (i = 0; i < rdev->num_crtc; i++) {
9402 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9403 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405 	}
9406 }
9407 
9408 /**
9409  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410  *
9411  * @rdev: radeon_device pointer
9412  *
9413  * Fetches a GPU clock counter snapshot (SI).
9414  * Returns the 64 bit clock counter snapshot.
9415  */
9416 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417 {
9418 	uint64_t clock;
9419 
9420 	mutex_lock(&rdev->gpu_clock_mutex);
9421 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424 	mutex_unlock(&rdev->gpu_clock_mutex);
9425 	return clock;
9426 }
9427 
9428 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429 			     u32 cntl_reg, u32 status_reg)
9430 {
9431 	int r, i;
9432 	struct atom_clock_dividers dividers;
9433 	uint32_t tmp;
9434 
9435 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436 					   clock, false, &dividers);
9437 	if (r)
9438 		return r;
9439 
9440 	tmp = RREG32_SMC(cntl_reg);
9441 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442 	tmp |= dividers.post_divider;
9443 	WREG32_SMC(cntl_reg, tmp);
9444 
9445 	for (i = 0; i < 100; i++) {
9446 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447 			break;
9448 		mdelay(10);
9449 	}
9450 	if (i == 100)
9451 		return -ETIMEDOUT;
9452 
9453 	return 0;
9454 }
9455 
9456 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457 {
9458 	int r = 0;
9459 
9460 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461 	if (r)
9462 		return r;
9463 
9464 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465 	return r;
9466 }
9467 
9468 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469 {
9470 	int r, i;
9471 	struct atom_clock_dividers dividers;
9472 	u32 tmp;
9473 
9474 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475 					   ecclk, false, &dividers);
9476 	if (r)
9477 		return r;
9478 
9479 	for (i = 0; i < 100; i++) {
9480 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481 			break;
9482 		mdelay(10);
9483 	}
9484 	if (i == 100)
9485 		return -ETIMEDOUT;
9486 
9487 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9488 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489 	tmp |= dividers.post_divider;
9490 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9491 
9492 	for (i = 0; i < 100; i++) {
9493 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494 			break;
9495 		mdelay(10);
9496 	}
9497 	if (i == 100)
9498 		return -ETIMEDOUT;
9499 
9500 	return 0;
9501 }
9502 
9503 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504 {
9505 	struct pci_dev *root = rdev->pdev->bus->self;
9506 	enum pci_bus_speed speed_cap;
9507 	int bridge_pos, gpu_pos;
9508 	u32 speed_cntl, current_data_rate;
9509 	int i;
9510 	u16 tmp16;
9511 
9512 	if (pci_is_root_bus(rdev->pdev->bus))
9513 		return;
9514 
9515 	if (radeon_pcie_gen2 == 0)
9516 		return;
9517 
9518 	if (rdev->flags & RADEON_IS_IGP)
9519 		return;
9520 
9521 	if (!(rdev->flags & RADEON_IS_PCIE))
9522 		return;
9523 
9524 	speed_cap = pcie_get_speed_cap(root);
9525 	if (speed_cap == PCI_SPEED_UNKNOWN)
9526 		return;
9527 
9528 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529 	    (speed_cap != PCIE_SPEED_5_0GT))
9530 		return;
9531 
9532 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534 		LC_CURRENT_DATA_RATE_SHIFT;
9535 	if (speed_cap == PCIE_SPEED_8_0GT) {
9536 		if (current_data_rate == 2) {
9537 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538 			return;
9539 		}
9540 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9542 		if (current_data_rate == 1) {
9543 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544 			return;
9545 		}
9546 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547 	}
9548 
9549 	bridge_pos = pci_pcie_cap(root);
9550 	if (!bridge_pos)
9551 		return;
9552 
9553 	gpu_pos = pci_pcie_cap(rdev->pdev);
9554 	if (!gpu_pos)
9555 		return;
9556 
9557 	if (speed_cap == PCIE_SPEED_8_0GT) {
9558 		/* re-try equalization if gen3 is not already enabled */
9559 		if (current_data_rate != 2) {
9560 			u16 bridge_cfg, gpu_cfg;
9561 			u16 bridge_cfg2, gpu_cfg2;
9562 			u32 max_lw, current_lw, tmp;
9563 
9564 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566 
9567 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569 
9570 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572 
9573 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576 
9577 			if (current_lw < max_lw) {
9578 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584 				}
9585 			}
9586 
9587 			for (i = 0; i < 10; i++) {
9588 				/* check status */
9589 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9590 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591 					break;
9592 
9593 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595 
9596 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9598 
9599 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600 				tmp |= LC_SET_QUIESCE;
9601 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602 
9603 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604 				tmp |= LC_REDO_EQ;
9605 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606 
9607 				msleep(100);
9608 
9609 				/* linkctl */
9610 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614 
9615 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619 
9620 				/* linkctl2 */
9621 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622 				tmp16 &= ~((1 << 4) | (7 << 9));
9623 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625 
9626 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627 				tmp16 &= ~((1 << 4) | (7 << 9));
9628 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9630 
9631 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632 				tmp &= ~LC_SET_QUIESCE;
9633 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634 			}
9635 		}
9636 	}
9637 
9638 	/* set the link speed */
9639 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642 
9643 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644 	tmp16 &= ~0xf;
9645 	if (speed_cap == PCIE_SPEED_8_0GT)
9646 		tmp16 |= 3; /* gen3 */
9647 	else if (speed_cap == PCIE_SPEED_5_0GT)
9648 		tmp16 |= 2; /* gen2 */
9649 	else
9650 		tmp16 |= 1; /* gen1 */
9651 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9652 
9653 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656 
9657 	for (i = 0; i < rdev->usec_timeout; i++) {
9658 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660 			break;
9661 		udelay(1);
9662 	}
9663 }
9664 
9665 static void cik_program_aspm(struct radeon_device *rdev)
9666 {
9667 	u32 data, orig;
9668 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669 	bool disable_clkreq = false;
9670 
9671 	if (radeon_aspm == 0)
9672 		return;
9673 
9674 	/* XXX double check IGPs */
9675 	if (rdev->flags & RADEON_IS_IGP)
9676 		return;
9677 
9678 	if (!(rdev->flags & RADEON_IS_PCIE))
9679 		return;
9680 
9681 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682 	data &= ~LC_XMIT_N_FTS_MASK;
9683 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684 	if (orig != data)
9685 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686 
9687 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688 	data |= LC_GO_TO_RECOVERY;
9689 	if (orig != data)
9690 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691 
9692 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693 	data |= P_IGNORE_EDB_ERR;
9694 	if (orig != data)
9695 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696 
9697 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699 	data |= LC_PMI_TO_L1_DIS;
9700 	if (!disable_l0s)
9701 		data |= LC_L0S_INACTIVITY(7);
9702 
9703 	if (!disable_l1) {
9704 		data |= LC_L1_INACTIVITY(7);
9705 		data &= ~LC_PMI_TO_L1_DIS;
9706 		if (orig != data)
9707 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708 
9709 		if (!disable_plloff_in_l1) {
9710 			bool clk_req_support;
9711 
9712 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715 			if (orig != data)
9716 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717 
9718 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721 			if (orig != data)
9722 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723 
9724 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727 			if (orig != data)
9728 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729 
9730 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733 			if (orig != data)
9734 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735 
9736 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738 			data |= LC_DYN_LANES_PWR_STATE(3);
9739 			if (orig != data)
9740 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741 
9742 			if (!disable_clkreq &&
9743 			    !pci_is_root_bus(rdev->pdev->bus)) {
9744 				struct pci_dev *root = rdev->pdev->bus->self;
9745 				u32 lnkcap;
9746 
9747 				clk_req_support = false;
9748 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750 					clk_req_support = true;
9751 			} else {
9752 				clk_req_support = false;
9753 			}
9754 
9755 			if (clk_req_support) {
9756 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758 				if (orig != data)
9759 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760 
9761 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9762 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764 				if (orig != data)
9765 					WREG32_SMC(THM_CLK_CNTL, data);
9766 
9767 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770 				if (orig != data)
9771 					WREG32_SMC(MISC_CLK_CTRL, data);
9772 
9773 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774 				data &= ~BCLK_AS_XCLK;
9775 				if (orig != data)
9776 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9777 
9778 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779 				data &= ~FORCE_BIF_REFCLK_EN;
9780 				if (orig != data)
9781 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782 
9783 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784 				data &= ~MPLL_CLKOUT_SEL_MASK;
9785 				data |= MPLL_CLKOUT_SEL(4);
9786 				if (orig != data)
9787 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788 			}
9789 		}
9790 	} else {
9791 		if (orig != data)
9792 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793 	}
9794 
9795 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797 	if (orig != data)
9798 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799 
9800 	if (!disable_l0s) {
9801 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806 				data &= ~LC_L0S_INACTIVITY_MASK;
9807 				if (orig != data)
9808 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809 			}
9810 		}
9811 	}
9812 }
9813