xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision bf070bb0)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146 					  bool enable);
147 
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159 				  u32 reg, u32 *val)
160 {
161 	switch (reg) {
162 	case GRBM_STATUS:
163 	case GRBM_STATUS2:
164 	case GRBM_STATUS_SE0:
165 	case GRBM_STATUS_SE1:
166 	case GRBM_STATUS_SE2:
167 	case GRBM_STATUS_SE3:
168 	case SRBM_STATUS:
169 	case SRBM_STATUS2:
170 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172 	case UVD_STATUS:
173 	/* TODO VCE */
174 		*val = RREG32(reg);
175 		return 0;
176 	default:
177 		return -EINVAL;
178 	}
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190 	WREG32(CIK_DIDT_IND_INDEX, (reg));
191 	r = RREG32(CIK_DIDT_IND_DATA);
192 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193 	return r;
194 }
195 
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198 	unsigned long flags;
199 
200 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201 	WREG32(CIK_DIDT_IND_INDEX, (reg));
202 	WREG32(CIK_DIDT_IND_DATA, (v));
203 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205 
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209 	u32 temp;
210 	int actual_temp = 0;
211 
212 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213 		CTF_TEMP_SHIFT;
214 
215 	if (temp & 0x200)
216 		actual_temp = 255;
217 	else
218 		actual_temp = temp & 0x1ff;
219 
220 	actual_temp = actual_temp * 1000;
221 
222 	return actual_temp;
223 }
224 
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228 	u32 temp;
229 	int actual_temp = 0;
230 
231 	temp = RREG32_SMC(0xC0300E0C);
232 
233 	if (temp)
234 		actual_temp = (temp / 8) - 49;
235 	else
236 		actual_temp = 0;
237 
238 	actual_temp = actual_temp * 1000;
239 
240 	return actual_temp;
241 }
242 
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248 	unsigned long flags;
249 	u32 r;
250 
251 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252 	WREG32(PCIE_INDEX, reg);
253 	(void)RREG32(PCIE_INDEX);
254 	r = RREG32(PCIE_DATA);
255 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256 	return r;
257 }
258 
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261 	unsigned long flags;
262 
263 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264 	WREG32(PCIE_INDEX, reg);
265 	(void)RREG32(PCIE_INDEX);
266 	WREG32(PCIE_DATA, v);
267 	(void)RREG32(PCIE_DATA);
268 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270 
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273 	(0x0e00 << 16) | (0xc12c >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc140 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc150 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc15c >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc168 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc170 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc178 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc204 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b4 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2b8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2bc >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc2c0 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x8228 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x829c >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x869c >> 2),
302 	0x00000000,
303 	(0x0600 << 16) | (0x98f4 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x98f8 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x9900 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc260 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x90e8 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c000 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x3c00c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x8c1c >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x9700 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x4e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x5e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x6e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x7e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x8e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0x9e00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xae00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0xbe00 << 16) | (0xcd20 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x89bc >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0x8900 >> 2),
342 	0x00000000,
343 	0x3,
344 	(0x0e00 << 16) | (0xc130 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc134 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc1fc >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc208 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc264 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc268 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc26c >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc270 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc274 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc278 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc27c >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc280 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc284 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc288 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc28c >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc290 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc294 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc298 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc29c >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a0 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a4 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2a8 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2ac  >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc2b0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x301d0 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30238 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30250 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30254 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30258 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0x3025c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc900 >> 2),
419 	0x00000000,
420 	(0x4e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x5e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x6e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x7e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x8e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0x9e00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xae00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0xbe00 << 16) | (0xc904 >> 2),
435 	0x00000000,
436 	(0x4e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x5e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x6e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x7e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x8e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0x9e00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xae00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0xbe00 << 16) | (0xc908 >> 2),
451 	0x00000000,
452 	(0x4e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x5e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x6e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x7e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x8e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0x9e00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xae00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0xbe00 << 16) | (0xc90c >> 2),
467 	0x00000000,
468 	(0x4e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x5e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x6e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x7e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x8e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0x9e00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xae00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0xbe00 << 16) | (0xc910 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xc99c >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9834 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f00 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f04 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f08 >> 2),
499 	0x00000000,
500 	(0x0000 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0001 << 16) | (0x30f0c >> 2),
503 	0x00000000,
504 	(0x0600 << 16) | (0x9b7c >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a14 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8a18 >> 2),
509 	0x00000000,
510 	(0x0600 << 16) | (0x30a00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bf0 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8bcc >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8b24 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x30a04 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a10 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a14 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a18 >> 2),
525 	0x00000000,
526 	(0x0600 << 16) | (0x30a2c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc700 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc704 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc708 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xc768 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc770 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc774 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc778 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc77c >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc780 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc784 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc788 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc78c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc798 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc79c >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a0 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a4 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7a8 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7ac >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b0 >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0xc7b4 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x9100 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x3c010 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92a8 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92ac >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b4 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92b8 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92bc >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c0 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c4 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92c8 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92cc >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x92d0 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c00 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c04 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c20 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c38 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8c3c >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xae00 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x9604 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac08 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac0c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac10 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac14 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac58 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac68 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac6c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac70 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac74 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac78 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac7c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac80 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac84 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac88 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xac8c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x970c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9714 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x9718 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x971c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x4e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x5e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x6e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x7e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x8e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0x9e00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xae00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0xbe00 << 16) | (0x31068 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd10 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xcd14 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b0 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b4 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88b8 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88bc >> 2),
673 	0x00000000,
674 	(0x0400 << 16) | (0x89c0 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c4 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88c8 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d0 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d4 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x88d8 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x8980 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30938 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x3093c >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30940 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89a0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30900 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30904 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x89b4 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c210 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c214 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x3c218 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x8904 >> 2),
709 	0x00000000,
710 	0x5,
711 	(0x0e00 << 16) | (0x8c28 >> 2),
712 	(0x0e00 << 16) | (0x8c2c >> 2),
713 	(0x0e00 << 16) | (0x8c30 >> 2),
714 	(0x0e00 << 16) | (0x8c34 >> 2),
715 	(0x0e00 << 16) | (0x9600 >> 2),
716 };
717 
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720 	(0x0e00 << 16) | (0xc12c >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc140 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc150 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc15c >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc168 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc170 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc204 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b4 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2b8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2bc >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc2c0 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x8228 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x829c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x869c >> 2),
747 	0x00000000,
748 	(0x0600 << 16) | (0x98f4 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x98f8 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9900 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc260 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x90e8 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c000 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x3c00c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8c1c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x9700 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x4e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x5e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x6e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x7e00 << 16) | (0xcd20 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x89bc >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x8900 >> 2),
779 	0x00000000,
780 	0x3,
781 	(0x0e00 << 16) | (0xc130 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc134 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc1fc >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc208 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc264 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc268 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc26c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc270 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc274 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc28c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc290 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc294 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc298 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a0 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a4 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2a8 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xc2ac >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x301d0 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30238 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30250 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30254 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x30258 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x3025c >> 2),
826 	0x00000000,
827 	(0x4e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x5e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x6e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x7e00 << 16) | (0xc900 >> 2),
834 	0x00000000,
835 	(0x4e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x5e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x6e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x7e00 << 16) | (0xc904 >> 2),
842 	0x00000000,
843 	(0x4e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x5e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x6e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x7e00 << 16) | (0xc908 >> 2),
850 	0x00000000,
851 	(0x4e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x5e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x6e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x7e00 << 16) | (0xc90c >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0xc910 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xc99c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x9834 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f00 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f04 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f08 >> 2),
876 	0x00000000,
877 	(0x0000 << 16) | (0x30f0c >> 2),
878 	0x00000000,
879 	(0x0600 << 16) | (0x9b7c >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8a18 >> 2),
884 	0x00000000,
885 	(0x0600 << 16) | (0x30a00 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bf0 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8bcc >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8b24 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30a04 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a10 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a14 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a18 >> 2),
900 	0x00000000,
901 	(0x0600 << 16) | (0x30a2c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc700 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc704 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc708 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0xc768 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc770 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc774 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc798 >> 2),
916 	0x00000000,
917 	(0x0400 << 16) | (0xc79c >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x9100 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c010 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c00 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c04 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c20 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c38 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x8c3c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xae00 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x9604 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac08 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac0c >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac10 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac14 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac58 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac68 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac6c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac70 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac74 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac78 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac7c >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac80 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac84 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac88 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xac8c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x970c >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9714 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x9718 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x971c >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x4e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x5e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x6e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x7e00 << 16) | (0x31068 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd10 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0xcd14 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b0 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b4 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88b8 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88bc >> 2),
996 	0x00000000,
997 	(0x0400 << 16) | (0x89c0 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c4 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88c8 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d0 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d4 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x88d8 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x8980 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30938 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x3093c >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30940 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x89a0 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30900 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x30904 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x89b4 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3e1fc >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c210 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c214 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x3c218 >> 2),
1032 	0x00000000,
1033 	(0x0e00 << 16) | (0x8904 >> 2),
1034 	0x00000000,
1035 	0x5,
1036 	(0x0e00 << 16) | (0x8c28 >> 2),
1037 	(0x0e00 << 16) | (0x8c2c >> 2),
1038 	(0x0e00 << 16) | (0x8c30 >> 2),
1039 	(0x0e00 << 16) | (0x8c34 >> 2),
1040 	(0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042 
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045 	0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047 
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050 	0xc770, 0xffffffff, 0x00000800,
1051 	0xc774, 0xffffffff, 0x00000800,
1052 	0xc798, 0xffffffff, 0x00007fbf,
1053 	0xc79c, 0xffffffff, 0x00007faf
1054 };
1055 
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058 	0x3354, 0x00000333, 0x00000333,
1059 	0x3350, 0x000c0fc0, 0x00040200,
1060 	0x9a10, 0x00010000, 0x00058208,
1061 	0x3c000, 0xffff1fff, 0x00140000,
1062 	0x3c200, 0xfdfc0fff, 0x00000100,
1063 	0x3c234, 0x40000000, 0x40000200,
1064 	0x9830, 0xffffffff, 0x00000000,
1065 	0x9834, 0xf00fffff, 0x00000400,
1066 	0x9838, 0x0002021c, 0x00020200,
1067 	0xc78, 0x00000080, 0x00000000,
1068 	0x5bb0, 0x000000f0, 0x00000070,
1069 	0x5bc0, 0xf0311fff, 0x80300000,
1070 	0x98f8, 0x73773777, 0x12010001,
1071 	0x350c, 0x00810000, 0x408af000,
1072 	0x7030, 0x31000111, 0x00000011,
1073 	0x2f48, 0x73773777, 0x12010001,
1074 	0x220c, 0x00007fb6, 0x0021a1b1,
1075 	0x2210, 0x00007fb6, 0x002021b1,
1076 	0x2180, 0x00007fb6, 0x00002191,
1077 	0x2218, 0x00007fb6, 0x002121b1,
1078 	0x221c, 0x00007fb6, 0x002021b1,
1079 	0x21dc, 0x00007fb6, 0x00002191,
1080 	0x21e0, 0x00007fb6, 0x00002191,
1081 	0x3628, 0x0000003f, 0x0000000a,
1082 	0x362c, 0x0000003f, 0x0000000a,
1083 	0x2ae4, 0x00073ffe, 0x000022a2,
1084 	0x240c, 0x000007ff, 0x00000000,
1085 	0x8a14, 0xf000003f, 0x00000007,
1086 	0x8bf0, 0x00002001, 0x00000001,
1087 	0x8b24, 0xffffffff, 0x00ffffff,
1088 	0x30a04, 0x0000ff0f, 0x00000000,
1089 	0x28a4c, 0x07ffffff, 0x06000000,
1090 	0x4d8, 0x00000fff, 0x00000100,
1091 	0x3e78, 0x00000001, 0x00000002,
1092 	0x9100, 0x03000000, 0x0362c688,
1093 	0x8c00, 0x000000ff, 0x00000001,
1094 	0xe40, 0x00001fff, 0x00001fff,
1095 	0x9060, 0x0000007f, 0x00000020,
1096 	0x9508, 0x00010000, 0x00010000,
1097 	0xac14, 0x000003ff, 0x000000f3,
1098 	0xac0c, 0xffffffff, 0x00001032
1099 };
1100 
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103 	0xc420, 0xffffffff, 0xfffffffc,
1104 	0x30800, 0xffffffff, 0xe0000000,
1105 	0x3c2a0, 0xffffffff, 0x00000100,
1106 	0x3c208, 0xffffffff, 0x00000100,
1107 	0x3c2c0, 0xffffffff, 0xc0000100,
1108 	0x3c2c8, 0xffffffff, 0xc0000100,
1109 	0x3c2c4, 0xffffffff, 0xc0000100,
1110 	0x55e4, 0xffffffff, 0x00600100,
1111 	0x3c280, 0xffffffff, 0x00000100,
1112 	0x3c214, 0xffffffff, 0x06000100,
1113 	0x3c220, 0xffffffff, 0x00000100,
1114 	0x3c218, 0xffffffff, 0x06000100,
1115 	0x3c204, 0xffffffff, 0x00000100,
1116 	0x3c2e0, 0xffffffff, 0x00000100,
1117 	0x3c224, 0xffffffff, 0x00000100,
1118 	0x3c200, 0xffffffff, 0x00000100,
1119 	0x3c230, 0xffffffff, 0x00000100,
1120 	0x3c234, 0xffffffff, 0x00000100,
1121 	0x3c250, 0xffffffff, 0x00000100,
1122 	0x3c254, 0xffffffff, 0x00000100,
1123 	0x3c258, 0xffffffff, 0x00000100,
1124 	0x3c25c, 0xffffffff, 0x00000100,
1125 	0x3c260, 0xffffffff, 0x00000100,
1126 	0x3c27c, 0xffffffff, 0x00000100,
1127 	0x3c278, 0xffffffff, 0x00000100,
1128 	0x3c210, 0xffffffff, 0x06000100,
1129 	0x3c290, 0xffffffff, 0x00000100,
1130 	0x3c274, 0xffffffff, 0x00000100,
1131 	0x3c2b4, 0xffffffff, 0x00000100,
1132 	0x3c2b0, 0xffffffff, 0x00000100,
1133 	0x3c270, 0xffffffff, 0x00000100,
1134 	0x30800, 0xffffffff, 0xe0000000,
1135 	0x3c020, 0xffffffff, 0x00010000,
1136 	0x3c024, 0xffffffff, 0x00030002,
1137 	0x3c028, 0xffffffff, 0x00040007,
1138 	0x3c02c, 0xffffffff, 0x00060005,
1139 	0x3c030, 0xffffffff, 0x00090008,
1140 	0x3c034, 0xffffffff, 0x00010000,
1141 	0x3c038, 0xffffffff, 0x00030002,
1142 	0x3c03c, 0xffffffff, 0x00040007,
1143 	0x3c040, 0xffffffff, 0x00060005,
1144 	0x3c044, 0xffffffff, 0x00090008,
1145 	0x3c048, 0xffffffff, 0x00010000,
1146 	0x3c04c, 0xffffffff, 0x00030002,
1147 	0x3c050, 0xffffffff, 0x00040007,
1148 	0x3c054, 0xffffffff, 0x00060005,
1149 	0x3c058, 0xffffffff, 0x00090008,
1150 	0x3c05c, 0xffffffff, 0x00010000,
1151 	0x3c060, 0xffffffff, 0x00030002,
1152 	0x3c064, 0xffffffff, 0x00040007,
1153 	0x3c068, 0xffffffff, 0x00060005,
1154 	0x3c06c, 0xffffffff, 0x00090008,
1155 	0x3c070, 0xffffffff, 0x00010000,
1156 	0x3c074, 0xffffffff, 0x00030002,
1157 	0x3c078, 0xffffffff, 0x00040007,
1158 	0x3c07c, 0xffffffff, 0x00060005,
1159 	0x3c080, 0xffffffff, 0x00090008,
1160 	0x3c084, 0xffffffff, 0x00010000,
1161 	0x3c088, 0xffffffff, 0x00030002,
1162 	0x3c08c, 0xffffffff, 0x00040007,
1163 	0x3c090, 0xffffffff, 0x00060005,
1164 	0x3c094, 0xffffffff, 0x00090008,
1165 	0x3c098, 0xffffffff, 0x00010000,
1166 	0x3c09c, 0xffffffff, 0x00030002,
1167 	0x3c0a0, 0xffffffff, 0x00040007,
1168 	0x3c0a4, 0xffffffff, 0x00060005,
1169 	0x3c0a8, 0xffffffff, 0x00090008,
1170 	0x3c000, 0xffffffff, 0x96e00200,
1171 	0x8708, 0xffffffff, 0x00900100,
1172 	0xc424, 0xffffffff, 0x0020003f,
1173 	0x38, 0xffffffff, 0x0140001c,
1174 	0x3c, 0x000f0000, 0x000f0000,
1175 	0x220, 0xffffffff, 0xC060000C,
1176 	0x224, 0xc0000fff, 0x00000100,
1177 	0xf90, 0xffffffff, 0x00000100,
1178 	0xf98, 0x00000101, 0x00000000,
1179 	0x20a8, 0xffffffff, 0x00000104,
1180 	0x55e4, 0xff000fff, 0x00000100,
1181 	0x30cc, 0xc0000fff, 0x00000104,
1182 	0xc1e4, 0x00000001, 0x00000001,
1183 	0xd00c, 0xff000ff0, 0x00000100,
1184 	0xd80c, 0xff000ff0, 0x00000100
1185 };
1186 
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189 	0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191 
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194 	0xc770, 0xffffffff, 0x00000800,
1195 	0xc774, 0xffffffff, 0x00000800,
1196 	0xc798, 0xffffffff, 0x00007fbf,
1197 	0xc79c, 0xffffffff, 0x00007faf
1198 };
1199 
1200 static const u32 spectre_golden_registers[] =
1201 {
1202 	0x3c000, 0xffff1fff, 0x96940200,
1203 	0x3c00c, 0xffff0001, 0xff000000,
1204 	0x3c200, 0xfffc0fff, 0x00000100,
1205 	0x6ed8, 0x00010101, 0x00010000,
1206 	0x9834, 0xf00fffff, 0x00000400,
1207 	0x9838, 0xfffffffc, 0x00020200,
1208 	0x5bb0, 0x000000f0, 0x00000070,
1209 	0x5bc0, 0xf0311fff, 0x80300000,
1210 	0x98f8, 0x73773777, 0x12010001,
1211 	0x9b7c, 0x00ff0000, 0x00fc0000,
1212 	0x2f48, 0x73773777, 0x12010001,
1213 	0x8a14, 0xf000003f, 0x00000007,
1214 	0x8b24, 0xffffffff, 0x00ffffff,
1215 	0x28350, 0x3f3f3fff, 0x00000082,
1216 	0x28354, 0x0000003f, 0x00000000,
1217 	0x3e78, 0x00000001, 0x00000002,
1218 	0x913c, 0xffff03df, 0x00000004,
1219 	0xc768, 0x00000008, 0x00000008,
1220 	0x8c00, 0x000008ff, 0x00000800,
1221 	0x9508, 0x00010000, 0x00010000,
1222 	0xac0c, 0xffffffff, 0x54763210,
1223 	0x214f8, 0x01ff01ff, 0x00000002,
1224 	0x21498, 0x007ff800, 0x00200000,
1225 	0x2015c, 0xffffffff, 0x00000f40,
1226 	0x30934, 0xffffffff, 0x00000001
1227 };
1228 
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231 	0xc420, 0xffffffff, 0xfffffffc,
1232 	0x30800, 0xffffffff, 0xe0000000,
1233 	0x3c2a0, 0xffffffff, 0x00000100,
1234 	0x3c208, 0xffffffff, 0x00000100,
1235 	0x3c2c0, 0xffffffff, 0x00000100,
1236 	0x3c2c8, 0xffffffff, 0x00000100,
1237 	0x3c2c4, 0xffffffff, 0x00000100,
1238 	0x55e4, 0xffffffff, 0x00600100,
1239 	0x3c280, 0xffffffff, 0x00000100,
1240 	0x3c214, 0xffffffff, 0x06000100,
1241 	0x3c220, 0xffffffff, 0x00000100,
1242 	0x3c218, 0xffffffff, 0x06000100,
1243 	0x3c204, 0xffffffff, 0x00000100,
1244 	0x3c2e0, 0xffffffff, 0x00000100,
1245 	0x3c224, 0xffffffff, 0x00000100,
1246 	0x3c200, 0xffffffff, 0x00000100,
1247 	0x3c230, 0xffffffff, 0x00000100,
1248 	0x3c234, 0xffffffff, 0x00000100,
1249 	0x3c250, 0xffffffff, 0x00000100,
1250 	0x3c254, 0xffffffff, 0x00000100,
1251 	0x3c258, 0xffffffff, 0x00000100,
1252 	0x3c25c, 0xffffffff, 0x00000100,
1253 	0x3c260, 0xffffffff, 0x00000100,
1254 	0x3c27c, 0xffffffff, 0x00000100,
1255 	0x3c278, 0xffffffff, 0x00000100,
1256 	0x3c210, 0xffffffff, 0x06000100,
1257 	0x3c290, 0xffffffff, 0x00000100,
1258 	0x3c274, 0xffffffff, 0x00000100,
1259 	0x3c2b4, 0xffffffff, 0x00000100,
1260 	0x3c2b0, 0xffffffff, 0x00000100,
1261 	0x3c270, 0xffffffff, 0x00000100,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c020, 0xffffffff, 0x00010000,
1264 	0x3c024, 0xffffffff, 0x00030002,
1265 	0x3c028, 0xffffffff, 0x00040007,
1266 	0x3c02c, 0xffffffff, 0x00060005,
1267 	0x3c030, 0xffffffff, 0x00090008,
1268 	0x3c034, 0xffffffff, 0x00010000,
1269 	0x3c038, 0xffffffff, 0x00030002,
1270 	0x3c03c, 0xffffffff, 0x00040007,
1271 	0x3c040, 0xffffffff, 0x00060005,
1272 	0x3c044, 0xffffffff, 0x00090008,
1273 	0x3c048, 0xffffffff, 0x00010000,
1274 	0x3c04c, 0xffffffff, 0x00030002,
1275 	0x3c050, 0xffffffff, 0x00040007,
1276 	0x3c054, 0xffffffff, 0x00060005,
1277 	0x3c058, 0xffffffff, 0x00090008,
1278 	0x3c05c, 0xffffffff, 0x00010000,
1279 	0x3c060, 0xffffffff, 0x00030002,
1280 	0x3c064, 0xffffffff, 0x00040007,
1281 	0x3c068, 0xffffffff, 0x00060005,
1282 	0x3c06c, 0xffffffff, 0x00090008,
1283 	0x3c070, 0xffffffff, 0x00010000,
1284 	0x3c074, 0xffffffff, 0x00030002,
1285 	0x3c078, 0xffffffff, 0x00040007,
1286 	0x3c07c, 0xffffffff, 0x00060005,
1287 	0x3c080, 0xffffffff, 0x00090008,
1288 	0x3c084, 0xffffffff, 0x00010000,
1289 	0x3c088, 0xffffffff, 0x00030002,
1290 	0x3c08c, 0xffffffff, 0x00040007,
1291 	0x3c090, 0xffffffff, 0x00060005,
1292 	0x3c094, 0xffffffff, 0x00090008,
1293 	0x3c098, 0xffffffff, 0x00010000,
1294 	0x3c09c, 0xffffffff, 0x00030002,
1295 	0x3c0a0, 0xffffffff, 0x00040007,
1296 	0x3c0a4, 0xffffffff, 0x00060005,
1297 	0x3c0a8, 0xffffffff, 0x00090008,
1298 	0x3c0ac, 0xffffffff, 0x00010000,
1299 	0x3c0b0, 0xffffffff, 0x00030002,
1300 	0x3c0b4, 0xffffffff, 0x00040007,
1301 	0x3c0b8, 0xffffffff, 0x00060005,
1302 	0x3c0bc, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0xf90, 0xffffffff, 0x00000100,
1311 	0xf98, 0x00000101, 0x00000000,
1312 	0x20a8, 0xffffffff, 0x00000104,
1313 	0x55e4, 0xff000fff, 0x00000100,
1314 	0x30cc, 0xc0000fff, 0x00000104,
1315 	0xc1e4, 0x00000001, 0x00000001,
1316 	0xd00c, 0xff000ff0, 0x00000100,
1317 	0xd80c, 0xff000ff0, 0x00000100
1318 };
1319 
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322 	0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324 
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327 	0xc770, 0xffffffff, 0x00000800,
1328 	0xc774, 0xffffffff, 0x00000800,
1329 	0xc798, 0xffffffff, 0x00007fbf,
1330 	0xc79c, 0xffffffff, 0x00007faf
1331 };
1332 
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335 	0x3c000, 0xffffdfff, 0x6e944040,
1336 	0x55e4, 0xff607fff, 0xfc000100,
1337 	0x3c220, 0xff000fff, 0x00000100,
1338 	0x3c224, 0xff000fff, 0x00000100,
1339 	0x3c200, 0xfffc0fff, 0x00000100,
1340 	0x6ed8, 0x00010101, 0x00010000,
1341 	0x9830, 0xffffffff, 0x00000000,
1342 	0x9834, 0xf00fffff, 0x00000400,
1343 	0x5bb0, 0x000000f0, 0x00000070,
1344 	0x5bc0, 0xf0311fff, 0x80300000,
1345 	0x98f8, 0x73773777, 0x12010001,
1346 	0x98fc, 0xffffffff, 0x00000010,
1347 	0x9b7c, 0x00ff0000, 0x00fc0000,
1348 	0x8030, 0x00001f0f, 0x0000100a,
1349 	0x2f48, 0x73773777, 0x12010001,
1350 	0x2408, 0x000fffff, 0x000c007f,
1351 	0x8a14, 0xf000003f, 0x00000007,
1352 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x4d8, 0x00000fff, 0x00000100,
1356 	0x3e78, 0x00000001, 0x00000002,
1357 	0xc768, 0x00000008, 0x00000008,
1358 	0x8c00, 0x000000ff, 0x00000003,
1359 	0x214f8, 0x01ff01ff, 0x00000002,
1360 	0x21498, 0x007ff800, 0x00200000,
1361 	0x2015c, 0xffffffff, 0x00000f40,
1362 	0x88c4, 0x001f3ae3, 0x00000082,
1363 	0x88d4, 0x0000001f, 0x00000010,
1364 	0x30934, 0xffffffff, 0x00000000
1365 };
1366 
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369 	0xc420, 0xffffffff, 0xfffffffc,
1370 	0x30800, 0xffffffff, 0xe0000000,
1371 	0x3c2a0, 0xffffffff, 0x00000100,
1372 	0x3c208, 0xffffffff, 0x00000100,
1373 	0x3c2c0, 0xffffffff, 0x00000100,
1374 	0x3c2c8, 0xffffffff, 0x00000100,
1375 	0x3c2c4, 0xffffffff, 0x00000100,
1376 	0x55e4, 0xffffffff, 0x00600100,
1377 	0x3c280, 0xffffffff, 0x00000100,
1378 	0x3c214, 0xffffffff, 0x06000100,
1379 	0x3c220, 0xffffffff, 0x00000100,
1380 	0x3c218, 0xffffffff, 0x06000100,
1381 	0x3c204, 0xffffffff, 0x00000100,
1382 	0x3c2e0, 0xffffffff, 0x00000100,
1383 	0x3c224, 0xffffffff, 0x00000100,
1384 	0x3c200, 0xffffffff, 0x00000100,
1385 	0x3c230, 0xffffffff, 0x00000100,
1386 	0x3c234, 0xffffffff, 0x00000100,
1387 	0x3c250, 0xffffffff, 0x00000100,
1388 	0x3c254, 0xffffffff, 0x00000100,
1389 	0x3c258, 0xffffffff, 0x00000100,
1390 	0x3c25c, 0xffffffff, 0x00000100,
1391 	0x3c260, 0xffffffff, 0x00000100,
1392 	0x3c27c, 0xffffffff, 0x00000100,
1393 	0x3c278, 0xffffffff, 0x00000100,
1394 	0x3c210, 0xffffffff, 0x06000100,
1395 	0x3c290, 0xffffffff, 0x00000100,
1396 	0x3c274, 0xffffffff, 0x00000100,
1397 	0x3c2b4, 0xffffffff, 0x00000100,
1398 	0x3c2b0, 0xffffffff, 0x00000100,
1399 	0x3c270, 0xffffffff, 0x00000100,
1400 	0x30800, 0xffffffff, 0xe0000000,
1401 	0x3c020, 0xffffffff, 0x00010000,
1402 	0x3c024, 0xffffffff, 0x00030002,
1403 	0x3c028, 0xffffffff, 0x00040007,
1404 	0x3c02c, 0xffffffff, 0x00060005,
1405 	0x3c030, 0xffffffff, 0x00090008,
1406 	0x3c034, 0xffffffff, 0x00010000,
1407 	0x3c038, 0xffffffff, 0x00030002,
1408 	0x3c03c, 0xffffffff, 0x00040007,
1409 	0x3c040, 0xffffffff, 0x00060005,
1410 	0x3c044, 0xffffffff, 0x00090008,
1411 	0x3c000, 0xffffffff, 0x96e00200,
1412 	0x8708, 0xffffffff, 0x00900100,
1413 	0xc424, 0xffffffff, 0x0020003f,
1414 	0x38, 0xffffffff, 0x0140001c,
1415 	0x3c, 0x000f0000, 0x000f0000,
1416 	0x220, 0xffffffff, 0xC060000C,
1417 	0x224, 0xc0000fff, 0x00000100,
1418 	0x20a8, 0xffffffff, 0x00000104,
1419 	0x55e4, 0xff000fff, 0x00000100,
1420 	0x30cc, 0xc0000fff, 0x00000104,
1421 	0xc1e4, 0x00000001, 0x00000001,
1422 	0xd00c, 0xff000ff0, 0x00000100,
1423 	0xd80c, 0xff000ff0, 0x00000100
1424 };
1425 
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428 	0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430 
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433 	0x30800, 0xffffffff, 0xe0000000,
1434 	0x28350, 0xffffffff, 0x3a00161a,
1435 	0x28354, 0xffffffff, 0x0000002e,
1436 	0x9a10, 0xffffffff, 0x00018208,
1437 	0x98f8, 0xffffffff, 0x12011003
1438 };
1439 
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442 	0x3354, 0x00000333, 0x00000333,
1443 	0x9a10, 0x00010000, 0x00058208,
1444 	0x9830, 0xffffffff, 0x00000000,
1445 	0x9834, 0xf00fffff, 0x00000400,
1446 	0x9838, 0x0002021c, 0x00020200,
1447 	0xc78, 0x00000080, 0x00000000,
1448 	0x5bb0, 0x000000f0, 0x00000070,
1449 	0x5bc0, 0xf0311fff, 0x80300000,
1450 	0x350c, 0x00810000, 0x408af000,
1451 	0x7030, 0x31000111, 0x00000011,
1452 	0x2f48, 0x73773777, 0x12010001,
1453 	0x2120, 0x0000007f, 0x0000001b,
1454 	0x21dc, 0x00007fb6, 0x00002191,
1455 	0x3628, 0x0000003f, 0x0000000a,
1456 	0x362c, 0x0000003f, 0x0000000a,
1457 	0x2ae4, 0x00073ffe, 0x000022a2,
1458 	0x240c, 0x000007ff, 0x00000000,
1459 	0x8bf0, 0x00002001, 0x00000001,
1460 	0x8b24, 0xffffffff, 0x00ffffff,
1461 	0x30a04, 0x0000ff0f, 0x00000000,
1462 	0x28a4c, 0x07ffffff, 0x06000000,
1463 	0x3e78, 0x00000001, 0x00000002,
1464 	0xc768, 0x00000008, 0x00000008,
1465 	0xc770, 0x00000f00, 0x00000800,
1466 	0xc774, 0x00000f00, 0x00000800,
1467 	0xc798, 0x00ffffff, 0x00ff7fbf,
1468 	0xc79c, 0x00ffffff, 0x00ff7faf,
1469 	0x8c00, 0x000000ff, 0x00000800,
1470 	0xe40, 0x00001fff, 0x00001fff,
1471 	0x9060, 0x0000007f, 0x00000020,
1472 	0x9508, 0x00010000, 0x00010000,
1473 	0xae00, 0x00100000, 0x000ff07c,
1474 	0xac14, 0x000003ff, 0x0000000f,
1475 	0xac10, 0xffffffff, 0x7564fdec,
1476 	0xac0c, 0xffffffff, 0x3120b9a8,
1477 	0xac08, 0x20000000, 0x0f9c0000
1478 };
1479 
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482 	0xc420, 0xffffffff, 0xfffffffd,
1483 	0x30800, 0xffffffff, 0xe0000000,
1484 	0x3c2a0, 0xffffffff, 0x00000100,
1485 	0x3c208, 0xffffffff, 0x00000100,
1486 	0x3c2c0, 0xffffffff, 0x00000100,
1487 	0x3c2c8, 0xffffffff, 0x00000100,
1488 	0x3c2c4, 0xffffffff, 0x00000100,
1489 	0x55e4, 0xffffffff, 0x00200100,
1490 	0x3c280, 0xffffffff, 0x00000100,
1491 	0x3c214, 0xffffffff, 0x06000100,
1492 	0x3c220, 0xffffffff, 0x00000100,
1493 	0x3c218, 0xffffffff, 0x06000100,
1494 	0x3c204, 0xffffffff, 0x00000100,
1495 	0x3c2e0, 0xffffffff, 0x00000100,
1496 	0x3c224, 0xffffffff, 0x00000100,
1497 	0x3c200, 0xffffffff, 0x00000100,
1498 	0x3c230, 0xffffffff, 0x00000100,
1499 	0x3c234, 0xffffffff, 0x00000100,
1500 	0x3c250, 0xffffffff, 0x00000100,
1501 	0x3c254, 0xffffffff, 0x00000100,
1502 	0x3c258, 0xffffffff, 0x00000100,
1503 	0x3c25c, 0xffffffff, 0x00000100,
1504 	0x3c260, 0xffffffff, 0x00000100,
1505 	0x3c27c, 0xffffffff, 0x00000100,
1506 	0x3c278, 0xffffffff, 0x00000100,
1507 	0x3c210, 0xffffffff, 0x06000100,
1508 	0x3c290, 0xffffffff, 0x00000100,
1509 	0x3c274, 0xffffffff, 0x00000100,
1510 	0x3c2b4, 0xffffffff, 0x00000100,
1511 	0x3c2b0, 0xffffffff, 0x00000100,
1512 	0x3c270, 0xffffffff, 0x00000100,
1513 	0x30800, 0xffffffff, 0xe0000000,
1514 	0x3c020, 0xffffffff, 0x00010000,
1515 	0x3c024, 0xffffffff, 0x00030002,
1516 	0x3c028, 0xffffffff, 0x00040007,
1517 	0x3c02c, 0xffffffff, 0x00060005,
1518 	0x3c030, 0xffffffff, 0x00090008,
1519 	0x3c034, 0xffffffff, 0x00010000,
1520 	0x3c038, 0xffffffff, 0x00030002,
1521 	0x3c03c, 0xffffffff, 0x00040007,
1522 	0x3c040, 0xffffffff, 0x00060005,
1523 	0x3c044, 0xffffffff, 0x00090008,
1524 	0x3c048, 0xffffffff, 0x00010000,
1525 	0x3c04c, 0xffffffff, 0x00030002,
1526 	0x3c050, 0xffffffff, 0x00040007,
1527 	0x3c054, 0xffffffff, 0x00060005,
1528 	0x3c058, 0xffffffff, 0x00090008,
1529 	0x3c05c, 0xffffffff, 0x00010000,
1530 	0x3c060, 0xffffffff, 0x00030002,
1531 	0x3c064, 0xffffffff, 0x00040007,
1532 	0x3c068, 0xffffffff, 0x00060005,
1533 	0x3c06c, 0xffffffff, 0x00090008,
1534 	0x3c070, 0xffffffff, 0x00010000,
1535 	0x3c074, 0xffffffff, 0x00030002,
1536 	0x3c078, 0xffffffff, 0x00040007,
1537 	0x3c07c, 0xffffffff, 0x00060005,
1538 	0x3c080, 0xffffffff, 0x00090008,
1539 	0x3c084, 0xffffffff, 0x00010000,
1540 	0x3c088, 0xffffffff, 0x00030002,
1541 	0x3c08c, 0xffffffff, 0x00040007,
1542 	0x3c090, 0xffffffff, 0x00060005,
1543 	0x3c094, 0xffffffff, 0x00090008,
1544 	0x3c098, 0xffffffff, 0x00010000,
1545 	0x3c09c, 0xffffffff, 0x00030002,
1546 	0x3c0a0, 0xffffffff, 0x00040007,
1547 	0x3c0a4, 0xffffffff, 0x00060005,
1548 	0x3c0a8, 0xffffffff, 0x00090008,
1549 	0x3c0ac, 0xffffffff, 0x00010000,
1550 	0x3c0b0, 0xffffffff, 0x00030002,
1551 	0x3c0b4, 0xffffffff, 0x00040007,
1552 	0x3c0b8, 0xffffffff, 0x00060005,
1553 	0x3c0bc, 0xffffffff, 0x00090008,
1554 	0x3c0c0, 0xffffffff, 0x00010000,
1555 	0x3c0c4, 0xffffffff, 0x00030002,
1556 	0x3c0c8, 0xffffffff, 0x00040007,
1557 	0x3c0cc, 0xffffffff, 0x00060005,
1558 	0x3c0d0, 0xffffffff, 0x00090008,
1559 	0x3c0d4, 0xffffffff, 0x00010000,
1560 	0x3c0d8, 0xffffffff, 0x00030002,
1561 	0x3c0dc, 0xffffffff, 0x00040007,
1562 	0x3c0e0, 0xffffffff, 0x00060005,
1563 	0x3c0e4, 0xffffffff, 0x00090008,
1564 	0x3c0e8, 0xffffffff, 0x00010000,
1565 	0x3c0ec, 0xffffffff, 0x00030002,
1566 	0x3c0f0, 0xffffffff, 0x00040007,
1567 	0x3c0f4, 0xffffffff, 0x00060005,
1568 	0x3c0f8, 0xffffffff, 0x00090008,
1569 	0xc318, 0xffffffff, 0x00020200,
1570 	0x3350, 0xffffffff, 0x00000200,
1571 	0x15c0, 0xffffffff, 0x00000400,
1572 	0x55e8, 0xffffffff, 0x00000000,
1573 	0x2f50, 0xffffffff, 0x00000902,
1574 	0x3c000, 0xffffffff, 0x96940200,
1575 	0x8708, 0xffffffff, 0x00900100,
1576 	0xc424, 0xffffffff, 0x0020003f,
1577 	0x38, 0xffffffff, 0x0140001c,
1578 	0x3c, 0x000f0000, 0x000f0000,
1579 	0x220, 0xffffffff, 0xc060000c,
1580 	0x224, 0xc0000fff, 0x00000100,
1581 	0xf90, 0xffffffff, 0x00000100,
1582 	0xf98, 0x00000101, 0x00000000,
1583 	0x20a8, 0xffffffff, 0x00000104,
1584 	0x55e4, 0xff000fff, 0x00000100,
1585 	0x30cc, 0xc0000fff, 0x00000104,
1586 	0xc1e4, 0x00000001, 0x00000001,
1587 	0xd00c, 0xff000ff0, 0x00000100,
1588 	0xd80c, 0xff000ff0, 0x00000100
1589 };
1590 
1591 static const u32 godavari_golden_registers[] =
1592 {
1593 	0x55e4, 0xff607fff, 0xfc000100,
1594 	0x6ed8, 0x00010101, 0x00010000,
1595 	0x9830, 0xffffffff, 0x00000000,
1596 	0x98302, 0xf00fffff, 0x00000400,
1597 	0x6130, 0xffffffff, 0x00010000,
1598 	0x5bb0, 0x000000f0, 0x00000070,
1599 	0x5bc0, 0xf0311fff, 0x80300000,
1600 	0x98f8, 0x73773777, 0x12010001,
1601 	0x98fc, 0xffffffff, 0x00000010,
1602 	0x8030, 0x00001f0f, 0x0000100a,
1603 	0x2f48, 0x73773777, 0x12010001,
1604 	0x2408, 0x000fffff, 0x000c007f,
1605 	0x8a14, 0xf000003f, 0x00000007,
1606 	0x8b24, 0xffffffff, 0x00ff0fff,
1607 	0x30a04, 0x0000ff0f, 0x00000000,
1608 	0x28a4c, 0x07ffffff, 0x06000000,
1609 	0x4d8, 0x00000fff, 0x00000100,
1610 	0xd014, 0x00010000, 0x00810001,
1611 	0xd814, 0x00010000, 0x00810001,
1612 	0x3e78, 0x00000001, 0x00000002,
1613 	0xc768, 0x00000008, 0x00000008,
1614 	0xc770, 0x00000f00, 0x00000800,
1615 	0xc774, 0x00000f00, 0x00000800,
1616 	0xc798, 0x00ffffff, 0x00ff7fbf,
1617 	0xc79c, 0x00ffffff, 0x00ff7faf,
1618 	0x8c00, 0x000000ff, 0x00000001,
1619 	0x214f8, 0x01ff01ff, 0x00000002,
1620 	0x21498, 0x007ff800, 0x00200000,
1621 	0x2015c, 0xffffffff, 0x00000f40,
1622 	0x88c4, 0x001f3ae3, 0x00000082,
1623 	0x88d4, 0x0000001f, 0x00000010,
1624 	0x30934, 0xffffffff, 0x00000000
1625 };
1626 
1627 
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1631 	mutex_lock(&rdev->grbm_idx_mutex);
1632 	switch (rdev->family) {
1633 	case CHIP_BONAIRE:
1634 		radeon_program_register_sequence(rdev,
1635 						 bonaire_mgcg_cgcg_init,
1636 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1637 		radeon_program_register_sequence(rdev,
1638 						 bonaire_golden_registers,
1639 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1640 		radeon_program_register_sequence(rdev,
1641 						 bonaire_golden_common_registers,
1642 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 bonaire_golden_spm_registers,
1645 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1646 		break;
1647 	case CHIP_KABINI:
1648 		radeon_program_register_sequence(rdev,
1649 						 kalindi_mgcg_cgcg_init,
1650 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651 		radeon_program_register_sequence(rdev,
1652 						 kalindi_golden_registers,
1653 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1654 		radeon_program_register_sequence(rdev,
1655 						 kalindi_golden_common_registers,
1656 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 kalindi_golden_spm_registers,
1659 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660 		break;
1661 	case CHIP_MULLINS:
1662 		radeon_program_register_sequence(rdev,
1663 						 kalindi_mgcg_cgcg_init,
1664 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1665 		radeon_program_register_sequence(rdev,
1666 						 godavari_golden_registers,
1667 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1668 		radeon_program_register_sequence(rdev,
1669 						 kalindi_golden_common_registers,
1670 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 kalindi_golden_spm_registers,
1673 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1674 		break;
1675 	case CHIP_KAVERI:
1676 		radeon_program_register_sequence(rdev,
1677 						 spectre_mgcg_cgcg_init,
1678 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1679 		radeon_program_register_sequence(rdev,
1680 						 spectre_golden_registers,
1681 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1682 		radeon_program_register_sequence(rdev,
1683 						 spectre_golden_common_registers,
1684 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1685 		radeon_program_register_sequence(rdev,
1686 						 spectre_golden_spm_registers,
1687 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1688 		break;
1689 	case CHIP_HAWAII:
1690 		radeon_program_register_sequence(rdev,
1691 						 hawaii_mgcg_cgcg_init,
1692 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1693 		radeon_program_register_sequence(rdev,
1694 						 hawaii_golden_registers,
1695 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1696 		radeon_program_register_sequence(rdev,
1697 						 hawaii_golden_common_registers,
1698 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1699 		radeon_program_register_sequence(rdev,
1700 						 hawaii_golden_spm_registers,
1701 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1702 		break;
1703 	default:
1704 		break;
1705 	}
1706 	mutex_unlock(&rdev->grbm_idx_mutex);
1707 }
1708 
1709 /**
1710  * cik_get_xclk - get the xclk
1711  *
1712  * @rdev: radeon_device pointer
1713  *
1714  * Returns the reference clock used by the gfx engine
1715  * (CIK).
1716  */
1717 u32 cik_get_xclk(struct radeon_device *rdev)
1718 {
1719 	u32 reference_clock = rdev->clock.spll.reference_freq;
1720 
1721 	if (rdev->flags & RADEON_IS_IGP) {
1722 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1723 			return reference_clock / 2;
1724 	} else {
1725 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1726 			return reference_clock / 4;
1727 	}
1728 	return reference_clock;
1729 }
1730 
1731 /**
1732  * cik_mm_rdoorbell - read a doorbell dword
1733  *
1734  * @rdev: radeon_device pointer
1735  * @index: doorbell index
1736  *
1737  * Returns the value in the doorbell aperture at the
1738  * requested doorbell index (CIK).
1739  */
1740 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1741 {
1742 	if (index < rdev->doorbell.num_doorbells) {
1743 		return readl(rdev->doorbell.ptr + index);
1744 	} else {
1745 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1746 		return 0;
1747 	}
1748 }
1749 
1750 /**
1751  * cik_mm_wdoorbell - write a doorbell dword
1752  *
1753  * @rdev: radeon_device pointer
1754  * @index: doorbell index
1755  * @v: value to write
1756  *
1757  * Writes @v to the doorbell aperture at the
1758  * requested doorbell index (CIK).
1759  */
1760 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1761 {
1762 	if (index < rdev->doorbell.num_doorbells) {
1763 		writel(v, rdev->doorbell.ptr + index);
1764 	} else {
1765 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1766 	}
1767 }
1768 
1769 #define BONAIRE_IO_MC_REGS_SIZE 36
1770 
1771 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1772 {
1773 	{0x00000070, 0x04400000},
1774 	{0x00000071, 0x80c01803},
1775 	{0x00000072, 0x00004004},
1776 	{0x00000073, 0x00000100},
1777 	{0x00000074, 0x00ff0000},
1778 	{0x00000075, 0x34000000},
1779 	{0x00000076, 0x08000014},
1780 	{0x00000077, 0x00cc08ec},
1781 	{0x00000078, 0x00000400},
1782 	{0x00000079, 0x00000000},
1783 	{0x0000007a, 0x04090000},
1784 	{0x0000007c, 0x00000000},
1785 	{0x0000007e, 0x4408a8e8},
1786 	{0x0000007f, 0x00000304},
1787 	{0x00000080, 0x00000000},
1788 	{0x00000082, 0x00000001},
1789 	{0x00000083, 0x00000002},
1790 	{0x00000084, 0xf3e4f400},
1791 	{0x00000085, 0x052024e3},
1792 	{0x00000087, 0x00000000},
1793 	{0x00000088, 0x01000000},
1794 	{0x0000008a, 0x1c0a0000},
1795 	{0x0000008b, 0xff010000},
1796 	{0x0000008d, 0xffffefff},
1797 	{0x0000008e, 0xfff3efff},
1798 	{0x0000008f, 0xfff3efbf},
1799 	{0x00000092, 0xf7ffffff},
1800 	{0x00000093, 0xffffff7f},
1801 	{0x00000095, 0x00101101},
1802 	{0x00000096, 0x00000fff},
1803 	{0x00000097, 0x00116fff},
1804 	{0x00000098, 0x60010000},
1805 	{0x00000099, 0x10010000},
1806 	{0x0000009a, 0x00006000},
1807 	{0x0000009b, 0x00001000},
1808 	{0x0000009f, 0x00b48000}
1809 };
1810 
1811 #define HAWAII_IO_MC_REGS_SIZE 22
1812 
1813 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1814 {
1815 	{0x0000007d, 0x40000000},
1816 	{0x0000007e, 0x40180304},
1817 	{0x0000007f, 0x0000ff00},
1818 	{0x00000081, 0x00000000},
1819 	{0x00000083, 0x00000800},
1820 	{0x00000086, 0x00000000},
1821 	{0x00000087, 0x00000100},
1822 	{0x00000088, 0x00020100},
1823 	{0x00000089, 0x00000000},
1824 	{0x0000008b, 0x00040000},
1825 	{0x0000008c, 0x00000100},
1826 	{0x0000008e, 0xff010000},
1827 	{0x00000090, 0xffffefff},
1828 	{0x00000091, 0xfff3efff},
1829 	{0x00000092, 0xfff3efbf},
1830 	{0x00000093, 0xf7ffffff},
1831 	{0x00000094, 0xffffff7f},
1832 	{0x00000095, 0x00000fff},
1833 	{0x00000096, 0x00116fff},
1834 	{0x00000097, 0x60010000},
1835 	{0x00000098, 0x10010000},
1836 	{0x0000009f, 0x00c79000}
1837 };
1838 
1839 
1840 /**
1841  * cik_srbm_select - select specific register instances
1842  *
1843  * @rdev: radeon_device pointer
1844  * @me: selected ME (micro engine)
1845  * @pipe: pipe
1846  * @queue: queue
1847  * @vmid: VMID
1848  *
1849  * Switches the currently active registers instances.  Some
1850  * registers are instanced per VMID, others are instanced per
1851  * me/pipe/queue combination.
1852  */
1853 static void cik_srbm_select(struct radeon_device *rdev,
1854 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1855 {
1856 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1857 			     MEID(me & 0x3) |
1858 			     VMID(vmid & 0xf) |
1859 			     QUEUEID(queue & 0x7));
1860 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1861 }
1862 
1863 /* ucode loading */
1864 /**
1865  * ci_mc_load_microcode - load MC ucode into the hw
1866  *
1867  * @rdev: radeon_device pointer
1868  *
1869  * Load the GDDR MC ucode into the hw (CIK).
1870  * Returns 0 on success, error on failure.
1871  */
1872 int ci_mc_load_microcode(struct radeon_device *rdev)
1873 {
1874 	const __be32 *fw_data = NULL;
1875 	const __le32 *new_fw_data = NULL;
1876 	u32 running, tmp;
1877 	u32 *io_mc_regs = NULL;
1878 	const __le32 *new_io_mc_regs = NULL;
1879 	int i, regs_size, ucode_size;
1880 
1881 	if (!rdev->mc_fw)
1882 		return -EINVAL;
1883 
1884 	if (rdev->new_fw) {
1885 		const struct mc_firmware_header_v1_0 *hdr =
1886 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1887 
1888 		radeon_ucode_print_mc_hdr(&hdr->header);
1889 
1890 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1891 		new_io_mc_regs = (const __le32 *)
1892 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1893 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1894 		new_fw_data = (const __le32 *)
1895 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1896 	} else {
1897 		ucode_size = rdev->mc_fw->size / 4;
1898 
1899 		switch (rdev->family) {
1900 		case CHIP_BONAIRE:
1901 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1902 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1903 			break;
1904 		case CHIP_HAWAII:
1905 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1906 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1907 			break;
1908 		default:
1909 			return -EINVAL;
1910 		}
1911 		fw_data = (const __be32 *)rdev->mc_fw->data;
1912 	}
1913 
1914 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1915 
1916 	if (running == 0) {
1917 		/* reset the engine and set to writable */
1918 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1920 
1921 		/* load mc io regs */
1922 		for (i = 0; i < regs_size; i++) {
1923 			if (rdev->new_fw) {
1924 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1925 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1926 			} else {
1927 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1928 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1929 			}
1930 		}
1931 
1932 		tmp = RREG32(MC_SEQ_MISC0);
1933 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1934 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1935 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1936 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1937 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1938 		}
1939 
1940 		/* load the MC ucode */
1941 		for (i = 0; i < ucode_size; i++) {
1942 			if (rdev->new_fw)
1943 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1944 			else
1945 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1946 		}
1947 
1948 		/* put the engine back into the active state */
1949 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1952 
1953 		/* wait for training to complete */
1954 		for (i = 0; i < rdev->usec_timeout; i++) {
1955 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1956 				break;
1957 			udelay(1);
1958 		}
1959 		for (i = 0; i < rdev->usec_timeout; i++) {
1960 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1961 				break;
1962 			udelay(1);
1963 		}
1964 	}
1965 
1966 	return 0;
1967 }
1968 
1969 /**
1970  * cik_init_microcode - load ucode images from disk
1971  *
1972  * @rdev: radeon_device pointer
1973  *
1974  * Use the firmware interface to load the ucode images into
1975  * the driver (not loaded into hw).
1976  * Returns 0 on success, error on failure.
1977  */
1978 static int cik_init_microcode(struct radeon_device *rdev)
1979 {
1980 	const char *chip_name;
1981 	const char *new_chip_name;
1982 	size_t pfp_req_size, me_req_size, ce_req_size,
1983 		mec_req_size, rlc_req_size, mc_req_size = 0,
1984 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1985 	char fw_name[30];
1986 	int new_fw = 0;
1987 	int err;
1988 	int num_fw;
1989 	bool new_smc = false;
1990 
1991 	DRM_DEBUG("\n");
1992 
1993 	switch (rdev->family) {
1994 	case CHIP_BONAIRE:
1995 		chip_name = "BONAIRE";
1996 		if ((rdev->pdev->revision == 0x80) ||
1997 		    (rdev->pdev->revision == 0x81) ||
1998 		    (rdev->pdev->device == 0x665f))
1999 			new_smc = true;
2000 		new_chip_name = "bonaire";
2001 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2002 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2003 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2004 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2005 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2006 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2007 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2008 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2010 		num_fw = 8;
2011 		break;
2012 	case CHIP_HAWAII:
2013 		chip_name = "HAWAII";
2014 		if (rdev->pdev->revision == 0x80)
2015 			new_smc = true;
2016 		new_chip_name = "hawaii";
2017 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2019 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2022 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2023 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2024 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2026 		num_fw = 8;
2027 		break;
2028 	case CHIP_KAVERI:
2029 		chip_name = "KAVERI";
2030 		new_chip_name = "kaveri";
2031 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2033 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2036 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2037 		num_fw = 7;
2038 		break;
2039 	case CHIP_KABINI:
2040 		chip_name = "KABINI";
2041 		new_chip_name = "kabini";
2042 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2043 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2044 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2045 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2046 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2047 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2048 		num_fw = 6;
2049 		break;
2050 	case CHIP_MULLINS:
2051 		chip_name = "MULLINS";
2052 		new_chip_name = "mullins";
2053 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2054 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2055 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2056 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2057 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2058 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2059 		num_fw = 6;
2060 		break;
2061 	default: BUG();
2062 	}
2063 
2064 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2065 
2066 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2067 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 	if (err) {
2069 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2070 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2071 		if (err)
2072 			goto out;
2073 		if (rdev->pfp_fw->size != pfp_req_size) {
2074 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2075 			       rdev->pfp_fw->size, fw_name);
2076 			err = -EINVAL;
2077 			goto out;
2078 		}
2079 	} else {
2080 		err = radeon_ucode_validate(rdev->pfp_fw);
2081 		if (err) {
2082 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2083 			       fw_name);
2084 			goto out;
2085 		} else {
2086 			new_fw++;
2087 		}
2088 	}
2089 
2090 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092 	if (err) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095 		if (err)
2096 			goto out;
2097 		if (rdev->me_fw->size != me_req_size) {
2098 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2106 			       fw_name);
2107 			goto out;
2108 		} else {
2109 			new_fw++;
2110 		}
2111 	}
2112 
2113 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2114 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115 	if (err) {
2116 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2117 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2118 		if (err)
2119 			goto out;
2120 		if (rdev->ce_fw->size != ce_req_size) {
2121 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2122 			       rdev->ce_fw->size, fw_name);
2123 			err = -EINVAL;
2124 		}
2125 	} else {
2126 		err = radeon_ucode_validate(rdev->ce_fw);
2127 		if (err) {
2128 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2129 			       fw_name);
2130 			goto out;
2131 		} else {
2132 			new_fw++;
2133 		}
2134 	}
2135 
2136 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2137 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138 	if (err) {
2139 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2140 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 		if (err)
2142 			goto out;
2143 		if (rdev->mec_fw->size != mec_req_size) {
2144 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2145 			       rdev->mec_fw->size, fw_name);
2146 			err = -EINVAL;
2147 		}
2148 	} else {
2149 		err = radeon_ucode_validate(rdev->mec_fw);
2150 		if (err) {
2151 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2152 			       fw_name);
2153 			goto out;
2154 		} else {
2155 			new_fw++;
2156 		}
2157 	}
2158 
2159 	if (rdev->family == CHIP_KAVERI) {
2160 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2161 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2162 		if (err) {
2163 			goto out;
2164 		} else {
2165 			err = radeon_ucode_validate(rdev->mec2_fw);
2166 			if (err) {
2167 				goto out;
2168 			} else {
2169 				new_fw++;
2170 			}
2171 		}
2172 	}
2173 
2174 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2175 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176 	if (err) {
2177 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2178 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2179 		if (err)
2180 			goto out;
2181 		if (rdev->rlc_fw->size != rlc_req_size) {
2182 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2183 			       rdev->rlc_fw->size, fw_name);
2184 			err = -EINVAL;
2185 		}
2186 	} else {
2187 		err = radeon_ucode_validate(rdev->rlc_fw);
2188 		if (err) {
2189 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2190 			       fw_name);
2191 			goto out;
2192 		} else {
2193 			new_fw++;
2194 		}
2195 	}
2196 
2197 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2198 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199 	if (err) {
2200 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2201 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2202 		if (err)
2203 			goto out;
2204 		if (rdev->sdma_fw->size != sdma_req_size) {
2205 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2206 			       rdev->sdma_fw->size, fw_name);
2207 			err = -EINVAL;
2208 		}
2209 	} else {
2210 		err = radeon_ucode_validate(rdev->sdma_fw);
2211 		if (err) {
2212 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2213 			       fw_name);
2214 			goto out;
2215 		} else {
2216 			new_fw++;
2217 		}
2218 	}
2219 
2220 	/* No SMC, MC ucode on APUs */
2221 	if (!(rdev->flags & RADEON_IS_IGP)) {
2222 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2223 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224 		if (err) {
2225 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2226 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227 			if (err) {
2228 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2229 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2230 				if (err)
2231 					goto out;
2232 			}
2233 			if ((rdev->mc_fw->size != mc_req_size) &&
2234 			    (rdev->mc_fw->size != mc2_req_size)){
2235 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2236 				       rdev->mc_fw->size, fw_name);
2237 				err = -EINVAL;
2238 			}
2239 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2240 		} else {
2241 			err = radeon_ucode_validate(rdev->mc_fw);
2242 			if (err) {
2243 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2244 				       fw_name);
2245 				goto out;
2246 			} else {
2247 				new_fw++;
2248 			}
2249 		}
2250 
2251 		if (new_smc)
2252 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2253 		else
2254 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2255 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256 		if (err) {
2257 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2258 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2259 			if (err) {
2260 				pr_err("smc: error loading firmware \"%s\"\n",
2261 				       fw_name);
2262 				release_firmware(rdev->smc_fw);
2263 				rdev->smc_fw = NULL;
2264 				err = 0;
2265 			} else if (rdev->smc_fw->size != smc_req_size) {
2266 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2267 				       rdev->smc_fw->size, fw_name);
2268 				err = -EINVAL;
2269 			}
2270 		} else {
2271 			err = radeon_ucode_validate(rdev->smc_fw);
2272 			if (err) {
2273 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2274 				       fw_name);
2275 				goto out;
2276 			} else {
2277 				new_fw++;
2278 			}
2279 		}
2280 	}
2281 
2282 	if (new_fw == 0) {
2283 		rdev->new_fw = false;
2284 	} else if (new_fw < num_fw) {
2285 		pr_err("ci_fw: mixing new and old firmware!\n");
2286 		err = -EINVAL;
2287 	} else {
2288 		rdev->new_fw = true;
2289 	}
2290 
2291 out:
2292 	if (err) {
2293 		if (err != -EINVAL)
2294 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2295 			       fw_name);
2296 		release_firmware(rdev->pfp_fw);
2297 		rdev->pfp_fw = NULL;
2298 		release_firmware(rdev->me_fw);
2299 		rdev->me_fw = NULL;
2300 		release_firmware(rdev->ce_fw);
2301 		rdev->ce_fw = NULL;
2302 		release_firmware(rdev->mec_fw);
2303 		rdev->mec_fw = NULL;
2304 		release_firmware(rdev->mec2_fw);
2305 		rdev->mec2_fw = NULL;
2306 		release_firmware(rdev->rlc_fw);
2307 		rdev->rlc_fw = NULL;
2308 		release_firmware(rdev->sdma_fw);
2309 		rdev->sdma_fw = NULL;
2310 		release_firmware(rdev->mc_fw);
2311 		rdev->mc_fw = NULL;
2312 		release_firmware(rdev->smc_fw);
2313 		rdev->smc_fw = NULL;
2314 	}
2315 	return err;
2316 }
2317 
2318 /*
2319  * Core functions
2320  */
2321 /**
2322  * cik_tiling_mode_table_init - init the hw tiling table
2323  *
2324  * @rdev: radeon_device pointer
2325  *
2326  * Starting with SI, the tiling setup is done globally in a
2327  * set of 32 tiling modes.  Rather than selecting each set of
2328  * parameters per surface as on older asics, we just select
2329  * which index in the tiling table we want to use, and the
2330  * surface uses those parameters (CIK).
2331  */
2332 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2333 {
2334 	u32 *tile = rdev->config.cik.tile_mode_array;
2335 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2336 	const u32 num_tile_mode_states =
2337 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2338 	const u32 num_secondary_tile_mode_states =
2339 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2340 	u32 reg_offset, split_equal_to_row_size;
2341 	u32 num_pipe_configs;
2342 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2343 		rdev->config.cik.max_shader_engines;
2344 
2345 	switch (rdev->config.cik.mem_row_size_in_kb) {
2346 	case 1:
2347 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2348 		break;
2349 	case 2:
2350 	default:
2351 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2352 		break;
2353 	case 4:
2354 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2355 		break;
2356 	}
2357 
2358 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2359 	if (num_pipe_configs > 8)
2360 		num_pipe_configs = 16;
2361 
2362 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363 		tile[reg_offset] = 0;
2364 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2365 		macrotile[reg_offset] = 0;
2366 
2367 	switch(num_pipe_configs) {
2368 	case 16:
2369 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2373 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2377 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2379 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2381 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2385 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   TILE_SPLIT(split_equal_to_row_size));
2389 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2392 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2393 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2396 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2398 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 			   TILE_SPLIT(split_equal_to_row_size));
2400 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2401 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2402 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2405 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2412 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2420 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2435 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2442 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2444 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 
2448 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451 			   NUM_BANKS(ADDR_SURF_16_BANK));
2452 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455 			   NUM_BANKS(ADDR_SURF_16_BANK));
2456 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459 			   NUM_BANKS(ADDR_SURF_16_BANK));
2460 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 			   NUM_BANKS(ADDR_SURF_16_BANK));
2464 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 			   NUM_BANKS(ADDR_SURF_8_BANK));
2468 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 			   NUM_BANKS(ADDR_SURF_4_BANK));
2472 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			   NUM_BANKS(ADDR_SURF_2_BANK));
2476 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 			   NUM_BANKS(ADDR_SURF_16_BANK));
2480 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483 			   NUM_BANKS(ADDR_SURF_16_BANK));
2484 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			    NUM_BANKS(ADDR_SURF_16_BANK));
2488 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			    NUM_BANKS(ADDR_SURF_8_BANK));
2492 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 			    NUM_BANKS(ADDR_SURF_4_BANK));
2496 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 			    NUM_BANKS(ADDR_SURF_2_BANK));
2500 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 			    NUM_BANKS(ADDR_SURF_2_BANK));
2504 
2505 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2506 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2507 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2509 		break;
2510 
2511 	case 8:
2512 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2516 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2520 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2524 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2528 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   TILE_SPLIT(split_equal_to_row_size));
2532 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2536 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2540 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 			   TILE_SPLIT(split_equal_to_row_size));
2543 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2545 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2548 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2557 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2560 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2563 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2570 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2578 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 
2591 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594 				NUM_BANKS(ADDR_SURF_16_BANK));
2595 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2598 				NUM_BANKS(ADDR_SURF_16_BANK));
2599 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2602 				NUM_BANKS(ADDR_SURF_16_BANK));
2603 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606 				NUM_BANKS(ADDR_SURF_16_BANK));
2607 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610 				NUM_BANKS(ADDR_SURF_8_BANK));
2611 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614 				NUM_BANKS(ADDR_SURF_4_BANK));
2615 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2618 				NUM_BANKS(ADDR_SURF_2_BANK));
2619 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638 				NUM_BANKS(ADDR_SURF_8_BANK));
2639 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 				NUM_BANKS(ADDR_SURF_4_BANK));
2643 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646 				NUM_BANKS(ADDR_SURF_2_BANK));
2647 
2648 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2649 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2650 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2652 		break;
2653 
2654 	case 4:
2655 		if (num_rbs == 4) {
2656 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   TILE_SPLIT(split_equal_to_row_size));
2676 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 			   TILE_SPLIT(split_equal_to_row_size));
2687 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2689 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 
2735 		} else if (num_rbs < 4) {
2736 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2740 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2744 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2748 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2752 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(split_equal_to_row_size));
2756 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2760 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2761 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2762 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2763 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2764 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766 			   TILE_SPLIT(split_equal_to_row_size));
2767 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2769 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2772 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2774 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2776 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2778 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2781 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2782 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2785 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2787 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2789 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2792 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2796 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2797 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2802 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2804 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 		}
2815 
2816 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819 				NUM_BANKS(ADDR_SURF_16_BANK));
2820 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2823 				NUM_BANKS(ADDR_SURF_16_BANK));
2824 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827 				NUM_BANKS(ADDR_SURF_16_BANK));
2828 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 				NUM_BANKS(ADDR_SURF_8_BANK));
2840 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2843 				NUM_BANKS(ADDR_SURF_4_BANK));
2844 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863 				NUM_BANKS(ADDR_SURF_16_BANK));
2864 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867 				NUM_BANKS(ADDR_SURF_8_BANK));
2868 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2871 				NUM_BANKS(ADDR_SURF_4_BANK));
2872 
2873 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2874 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2875 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2876 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2877 		break;
2878 
2879 	case 2:
2880 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882 			   PIPE_CONFIG(ADDR_SURF_P2) |
2883 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2884 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 			   PIPE_CONFIG(ADDR_SURF_P2) |
2887 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2890 			   PIPE_CONFIG(ADDR_SURF_P2) |
2891 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2892 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P2) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2896 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   TILE_SPLIT(split_equal_to_row_size));
2900 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2901 			   PIPE_CONFIG(ADDR_SURF_P2) |
2902 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 			   PIPE_CONFIG(ADDR_SURF_P2) |
2906 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909 			   PIPE_CONFIG(ADDR_SURF_P2) |
2910 			   TILE_SPLIT(split_equal_to_row_size));
2911 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2912 			   PIPE_CONFIG(ADDR_SURF_P2);
2913 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2));
2916 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918 			    PIPE_CONFIG(ADDR_SURF_P2) |
2919 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922 			    PIPE_CONFIG(ADDR_SURF_P2) |
2923 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2925 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926 			    PIPE_CONFIG(ADDR_SURF_P2) |
2927 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929 			    PIPE_CONFIG(ADDR_SURF_P2) |
2930 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2931 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933 			    PIPE_CONFIG(ADDR_SURF_P2) |
2934 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937 			    PIPE_CONFIG(ADDR_SURF_P2) |
2938 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2940 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941 			    PIPE_CONFIG(ADDR_SURF_P2) |
2942 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2));
2946 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948 			    PIPE_CONFIG(ADDR_SURF_P2) |
2949 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952 			    PIPE_CONFIG(ADDR_SURF_P2) |
2953 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2955 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2956 			    PIPE_CONFIG(ADDR_SURF_P2) |
2957 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 
2959 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2960 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2961 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962 				NUM_BANKS(ADDR_SURF_16_BANK));
2963 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 				NUM_BANKS(ADDR_SURF_16_BANK));
2967 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2969 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 				NUM_BANKS(ADDR_SURF_16_BANK));
2971 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974 				NUM_BANKS(ADDR_SURF_16_BANK));
2975 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2986 				NUM_BANKS(ADDR_SURF_8_BANK));
2987 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002 				NUM_BANKS(ADDR_SURF_16_BANK));
3003 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3014 				NUM_BANKS(ADDR_SURF_8_BANK));
3015 
3016 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3017 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3018 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3020 		break;
3021 
3022 	default:
3023 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3024 	}
3025 }
3026 
3027 /**
3028  * cik_select_se_sh - select which SE, SH to address
3029  *
3030  * @rdev: radeon_device pointer
3031  * @se_num: shader engine to address
3032  * @sh_num: sh block to address
3033  *
3034  * Select which SE, SH combinations to address. Certain
3035  * registers are instanced per SE or SH.  0xffffffff means
3036  * broadcast to all SEs or SHs (CIK).
3037  */
3038 static void cik_select_se_sh(struct radeon_device *rdev,
3039 			     u32 se_num, u32 sh_num)
3040 {
3041 	u32 data = INSTANCE_BROADCAST_WRITES;
3042 
3043 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3044 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3045 	else if (se_num == 0xffffffff)
3046 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3047 	else if (sh_num == 0xffffffff)
3048 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3049 	else
3050 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3051 	WREG32(GRBM_GFX_INDEX, data);
3052 }
3053 
3054 /**
3055  * cik_create_bitmask - create a bitmask
3056  *
3057  * @bit_width: length of the mask
3058  *
3059  * create a variable length bit mask (CIK).
3060  * Returns the bitmask.
3061  */
3062 static u32 cik_create_bitmask(u32 bit_width)
3063 {
3064 	u32 i, mask = 0;
3065 
3066 	for (i = 0; i < bit_width; i++) {
3067 		mask <<= 1;
3068 		mask |= 1;
3069 	}
3070 	return mask;
3071 }
3072 
3073 /**
3074  * cik_get_rb_disabled - computes the mask of disabled RBs
3075  *
3076  * @rdev: radeon_device pointer
3077  * @max_rb_num: max RBs (render backends) for the asic
3078  * @se_num: number of SEs (shader engines) for the asic
3079  * @sh_per_se: number of SH blocks per SE for the asic
3080  *
3081  * Calculates the bitmask of disabled RBs (CIK).
3082  * Returns the disabled RB bitmask.
3083  */
3084 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3085 			      u32 max_rb_num_per_se,
3086 			      u32 sh_per_se)
3087 {
3088 	u32 data, mask;
3089 
3090 	data = RREG32(CC_RB_BACKEND_DISABLE);
3091 	if (data & 1)
3092 		data &= BACKEND_DISABLE_MASK;
3093 	else
3094 		data = 0;
3095 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3096 
3097 	data >>= BACKEND_DISABLE_SHIFT;
3098 
3099 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3100 
3101 	return data & mask;
3102 }
3103 
3104 /**
3105  * cik_setup_rb - setup the RBs on the asic
3106  *
3107  * @rdev: radeon_device pointer
3108  * @se_num: number of SEs (shader engines) for the asic
3109  * @sh_per_se: number of SH blocks per SE for the asic
3110  * @max_rb_num: max RBs (render backends) for the asic
3111  *
3112  * Configures per-SE/SH RB registers (CIK).
3113  */
3114 static void cik_setup_rb(struct radeon_device *rdev,
3115 			 u32 se_num, u32 sh_per_se,
3116 			 u32 max_rb_num_per_se)
3117 {
3118 	int i, j;
3119 	u32 data, mask;
3120 	u32 disabled_rbs = 0;
3121 	u32 enabled_rbs = 0;
3122 
3123 	mutex_lock(&rdev->grbm_idx_mutex);
3124 	for (i = 0; i < se_num; i++) {
3125 		for (j = 0; j < sh_per_se; j++) {
3126 			cik_select_se_sh(rdev, i, j);
3127 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128 			if (rdev->family == CHIP_HAWAII)
3129 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130 			else
3131 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132 		}
3133 	}
3134 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135 	mutex_unlock(&rdev->grbm_idx_mutex);
3136 
3137 	mask = 1;
3138 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3139 		if (!(disabled_rbs & mask))
3140 			enabled_rbs |= mask;
3141 		mask <<= 1;
3142 	}
3143 
3144 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3145 
3146 	mutex_lock(&rdev->grbm_idx_mutex);
3147 	for (i = 0; i < se_num; i++) {
3148 		cik_select_se_sh(rdev, i, 0xffffffff);
3149 		data = 0;
3150 		for (j = 0; j < sh_per_se; j++) {
3151 			switch (enabled_rbs & 3) {
3152 			case 0:
3153 				if (j == 0)
3154 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3155 				else
3156 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3157 				break;
3158 			case 1:
3159 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3160 				break;
3161 			case 2:
3162 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3163 				break;
3164 			case 3:
3165 			default:
3166 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3167 				break;
3168 			}
3169 			enabled_rbs >>= 2;
3170 		}
3171 		WREG32(PA_SC_RASTER_CONFIG, data);
3172 	}
3173 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3174 	mutex_unlock(&rdev->grbm_idx_mutex);
3175 }
3176 
3177 /**
3178  * cik_gpu_init - setup the 3D engine
3179  *
3180  * @rdev: radeon_device pointer
3181  *
3182  * Configures the 3D engine and tiling configuration
3183  * registers so that the 3D engine is usable.
3184  */
3185 static void cik_gpu_init(struct radeon_device *rdev)
3186 {
3187 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3188 	u32 mc_shared_chmap, mc_arb_ramcfg;
3189 	u32 hdp_host_path_cntl;
3190 	u32 tmp;
3191 	int i, j;
3192 
3193 	switch (rdev->family) {
3194 	case CHIP_BONAIRE:
3195 		rdev->config.cik.max_shader_engines = 2;
3196 		rdev->config.cik.max_tile_pipes = 4;
3197 		rdev->config.cik.max_cu_per_sh = 7;
3198 		rdev->config.cik.max_sh_per_se = 1;
3199 		rdev->config.cik.max_backends_per_se = 2;
3200 		rdev->config.cik.max_texture_channel_caches = 4;
3201 		rdev->config.cik.max_gprs = 256;
3202 		rdev->config.cik.max_gs_threads = 32;
3203 		rdev->config.cik.max_hw_contexts = 8;
3204 
3205 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3210 		break;
3211 	case CHIP_HAWAII:
3212 		rdev->config.cik.max_shader_engines = 4;
3213 		rdev->config.cik.max_tile_pipes = 16;
3214 		rdev->config.cik.max_cu_per_sh = 11;
3215 		rdev->config.cik.max_sh_per_se = 1;
3216 		rdev->config.cik.max_backends_per_se = 4;
3217 		rdev->config.cik.max_texture_channel_caches = 16;
3218 		rdev->config.cik.max_gprs = 256;
3219 		rdev->config.cik.max_gs_threads = 32;
3220 		rdev->config.cik.max_hw_contexts = 8;
3221 
3222 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3227 		break;
3228 	case CHIP_KAVERI:
3229 		rdev->config.cik.max_shader_engines = 1;
3230 		rdev->config.cik.max_tile_pipes = 4;
3231 		if ((rdev->pdev->device == 0x1304) ||
3232 		    (rdev->pdev->device == 0x1305) ||
3233 		    (rdev->pdev->device == 0x130C) ||
3234 		    (rdev->pdev->device == 0x130F) ||
3235 		    (rdev->pdev->device == 0x1310) ||
3236 		    (rdev->pdev->device == 0x1311) ||
3237 		    (rdev->pdev->device == 0x131C)) {
3238 			rdev->config.cik.max_cu_per_sh = 8;
3239 			rdev->config.cik.max_backends_per_se = 2;
3240 		} else if ((rdev->pdev->device == 0x1309) ||
3241 			   (rdev->pdev->device == 0x130A) ||
3242 			   (rdev->pdev->device == 0x130D) ||
3243 			   (rdev->pdev->device == 0x1313) ||
3244 			   (rdev->pdev->device == 0x131D)) {
3245 			rdev->config.cik.max_cu_per_sh = 6;
3246 			rdev->config.cik.max_backends_per_se = 2;
3247 		} else if ((rdev->pdev->device == 0x1306) ||
3248 			   (rdev->pdev->device == 0x1307) ||
3249 			   (rdev->pdev->device == 0x130B) ||
3250 			   (rdev->pdev->device == 0x130E) ||
3251 			   (rdev->pdev->device == 0x1315) ||
3252 			   (rdev->pdev->device == 0x1318) ||
3253 			   (rdev->pdev->device == 0x131B)) {
3254 			rdev->config.cik.max_cu_per_sh = 4;
3255 			rdev->config.cik.max_backends_per_se = 1;
3256 		} else {
3257 			rdev->config.cik.max_cu_per_sh = 3;
3258 			rdev->config.cik.max_backends_per_se = 1;
3259 		}
3260 		rdev->config.cik.max_sh_per_se = 1;
3261 		rdev->config.cik.max_texture_channel_caches = 4;
3262 		rdev->config.cik.max_gprs = 256;
3263 		rdev->config.cik.max_gs_threads = 16;
3264 		rdev->config.cik.max_hw_contexts = 8;
3265 
3266 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3267 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3268 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3269 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3270 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3271 		break;
3272 	case CHIP_KABINI:
3273 	case CHIP_MULLINS:
3274 	default:
3275 		rdev->config.cik.max_shader_engines = 1;
3276 		rdev->config.cik.max_tile_pipes = 2;
3277 		rdev->config.cik.max_cu_per_sh = 2;
3278 		rdev->config.cik.max_sh_per_se = 1;
3279 		rdev->config.cik.max_backends_per_se = 1;
3280 		rdev->config.cik.max_texture_channel_caches = 2;
3281 		rdev->config.cik.max_gprs = 256;
3282 		rdev->config.cik.max_gs_threads = 16;
3283 		rdev->config.cik.max_hw_contexts = 8;
3284 
3285 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3286 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3287 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3288 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3289 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3290 		break;
3291 	}
3292 
3293 	/* Initialize HDP */
3294 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295 		WREG32((0x2c14 + j), 0x00000000);
3296 		WREG32((0x2c18 + j), 0x00000000);
3297 		WREG32((0x2c1c + j), 0x00000000);
3298 		WREG32((0x2c20 + j), 0x00000000);
3299 		WREG32((0x2c24 + j), 0x00000000);
3300 	}
3301 
3302 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3303 	WREG32(SRBM_INT_CNTL, 0x1);
3304 	WREG32(SRBM_INT_ACK, 0x1);
3305 
3306 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3307 
3308 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3309 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3310 
3311 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3312 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3313 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3314 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3315 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3316 		rdev->config.cik.mem_row_size_in_kb = 4;
3317 	/* XXX use MC settings? */
3318 	rdev->config.cik.shader_engine_tile_size = 32;
3319 	rdev->config.cik.num_gpus = 1;
3320 	rdev->config.cik.multi_gpu_tile_size = 64;
3321 
3322 	/* fix up row size */
3323 	gb_addr_config &= ~ROW_SIZE_MASK;
3324 	switch (rdev->config.cik.mem_row_size_in_kb) {
3325 	case 1:
3326 	default:
3327 		gb_addr_config |= ROW_SIZE(0);
3328 		break;
3329 	case 2:
3330 		gb_addr_config |= ROW_SIZE(1);
3331 		break;
3332 	case 4:
3333 		gb_addr_config |= ROW_SIZE(2);
3334 		break;
3335 	}
3336 
3337 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3338 	 * not have bank info, so create a custom tiling dword.
3339 	 * bits 3:0   num_pipes
3340 	 * bits 7:4   num_banks
3341 	 * bits 11:8  group_size
3342 	 * bits 15:12 row_size
3343 	 */
3344 	rdev->config.cik.tile_config = 0;
3345 	switch (rdev->config.cik.num_tile_pipes) {
3346 	case 1:
3347 		rdev->config.cik.tile_config |= (0 << 0);
3348 		break;
3349 	case 2:
3350 		rdev->config.cik.tile_config |= (1 << 0);
3351 		break;
3352 	case 4:
3353 		rdev->config.cik.tile_config |= (2 << 0);
3354 		break;
3355 	case 8:
3356 	default:
3357 		/* XXX what about 12? */
3358 		rdev->config.cik.tile_config |= (3 << 0);
3359 		break;
3360 	}
3361 	rdev->config.cik.tile_config |=
3362 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3363 	rdev->config.cik.tile_config |=
3364 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3365 	rdev->config.cik.tile_config |=
3366 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3367 
3368 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3369 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3370 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3371 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3372 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3373 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3374 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3375 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3376 
3377 	cik_tiling_mode_table_init(rdev);
3378 
3379 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3380 		     rdev->config.cik.max_sh_per_se,
3381 		     rdev->config.cik.max_backends_per_se);
3382 
3383 	rdev->config.cik.active_cus = 0;
3384 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3385 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3386 			rdev->config.cik.active_cus +=
3387 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3388 		}
3389 	}
3390 
3391 	/* set HW defaults for 3D engine */
3392 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3393 
3394 	mutex_lock(&rdev->grbm_idx_mutex);
3395 	/*
3396 	 * making sure that the following register writes will be broadcasted
3397 	 * to all the shaders
3398 	 */
3399 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3400 	WREG32(SX_DEBUG_1, 0x20);
3401 
3402 	WREG32(TA_CNTL_AUX, 0x00010000);
3403 
3404 	tmp = RREG32(SPI_CONFIG_CNTL);
3405 	tmp |= 0x03000000;
3406 	WREG32(SPI_CONFIG_CNTL, tmp);
3407 
3408 	WREG32(SQ_CONFIG, 1);
3409 
3410 	WREG32(DB_DEBUG, 0);
3411 
3412 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3413 	tmp |= 0x00000400;
3414 	WREG32(DB_DEBUG2, tmp);
3415 
3416 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3417 	tmp |= 0x00020200;
3418 	WREG32(DB_DEBUG3, tmp);
3419 
3420 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3421 	tmp |= 0x00018208;
3422 	WREG32(CB_HW_CONTROL, tmp);
3423 
3424 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3425 
3426 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3427 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3428 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3429 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3430 
3431 	WREG32(VGT_NUM_INSTANCES, 1);
3432 
3433 	WREG32(CP_PERFMON_CNTL, 0);
3434 
3435 	WREG32(SQ_CONFIG, 0);
3436 
3437 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3438 					  FORCE_EOV_MAX_REZ_CNT(255)));
3439 
3440 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3441 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3442 
3443 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3444 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3445 
3446 	tmp = RREG32(HDP_MISC_CNTL);
3447 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3448 	WREG32(HDP_MISC_CNTL, tmp);
3449 
3450 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3451 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3452 
3453 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3454 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3455 	mutex_unlock(&rdev->grbm_idx_mutex);
3456 
3457 	udelay(50);
3458 }
3459 
3460 /*
3461  * GPU scratch registers helpers function.
3462  */
3463 /**
3464  * cik_scratch_init - setup driver info for CP scratch regs
3465  *
3466  * @rdev: radeon_device pointer
3467  *
3468  * Set up the number and offset of the CP scratch registers.
3469  * NOTE: use of CP scratch registers is a legacy inferface and
3470  * is not used by default on newer asics (r6xx+).  On newer asics,
3471  * memory buffers are used for fences rather than scratch regs.
3472  */
3473 static void cik_scratch_init(struct radeon_device *rdev)
3474 {
3475 	int i;
3476 
3477 	rdev->scratch.num_reg = 7;
3478 	rdev->scratch.reg_base = SCRATCH_REG0;
3479 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3480 		rdev->scratch.free[i] = true;
3481 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3482 	}
3483 }
3484 
3485 /**
3486  * cik_ring_test - basic gfx ring test
3487  *
3488  * @rdev: radeon_device pointer
3489  * @ring: radeon_ring structure holding ring information
3490  *
3491  * Allocate a scratch register and write to it using the gfx ring (CIK).
3492  * Provides a basic gfx ring test to verify that the ring is working.
3493  * Used by cik_cp_gfx_resume();
3494  * Returns 0 on success, error on failure.
3495  */
3496 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3497 {
3498 	uint32_t scratch;
3499 	uint32_t tmp = 0;
3500 	unsigned i;
3501 	int r;
3502 
3503 	r = radeon_scratch_get(rdev, &scratch);
3504 	if (r) {
3505 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3506 		return r;
3507 	}
3508 	WREG32(scratch, 0xCAFEDEAD);
3509 	r = radeon_ring_lock(rdev, ring, 3);
3510 	if (r) {
3511 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3512 		radeon_scratch_free(rdev, scratch);
3513 		return r;
3514 	}
3515 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3516 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3517 	radeon_ring_write(ring, 0xDEADBEEF);
3518 	radeon_ring_unlock_commit(rdev, ring, false);
3519 
3520 	for (i = 0; i < rdev->usec_timeout; i++) {
3521 		tmp = RREG32(scratch);
3522 		if (tmp == 0xDEADBEEF)
3523 			break;
3524 		DRM_UDELAY(1);
3525 	}
3526 	if (i < rdev->usec_timeout) {
3527 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3528 	} else {
3529 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3530 			  ring->idx, scratch, tmp);
3531 		r = -EINVAL;
3532 	}
3533 	radeon_scratch_free(rdev, scratch);
3534 	return r;
3535 }
3536 
3537 /**
3538  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3539  *
3540  * @rdev: radeon_device pointer
3541  * @ridx: radeon ring index
3542  *
3543  * Emits an hdp flush on the cp.
3544  */
3545 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3546 				       int ridx)
3547 {
3548 	struct radeon_ring *ring = &rdev->ring[ridx];
3549 	u32 ref_and_mask;
3550 
3551 	switch (ring->idx) {
3552 	case CAYMAN_RING_TYPE_CP1_INDEX:
3553 	case CAYMAN_RING_TYPE_CP2_INDEX:
3554 	default:
3555 		switch (ring->me) {
3556 		case 0:
3557 			ref_and_mask = CP2 << ring->pipe;
3558 			break;
3559 		case 1:
3560 			ref_and_mask = CP6 << ring->pipe;
3561 			break;
3562 		default:
3563 			return;
3564 		}
3565 		break;
3566 	case RADEON_RING_TYPE_GFX_INDEX:
3567 		ref_and_mask = CP0;
3568 		break;
3569 	}
3570 
3571 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3572 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3573 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3574 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3575 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3576 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3577 	radeon_ring_write(ring, ref_and_mask);
3578 	radeon_ring_write(ring, ref_and_mask);
3579 	radeon_ring_write(ring, 0x20); /* poll interval */
3580 }
3581 
3582 /**
3583  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the gfx ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3592 			     struct radeon_fence *fence)
3593 {
3594 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3595 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596 
3597 	/* Workaround for cache flush problems. First send a dummy EOP
3598 	 * event down the pipe with seq one below.
3599 	 */
3600 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3601 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3602 				 EOP_TC_ACTION_EN |
3603 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3604 				 EVENT_INDEX(5)));
3605 	radeon_ring_write(ring, addr & 0xfffffffc);
3606 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3607 				DATA_SEL(1) | INT_SEL(0));
3608 	radeon_ring_write(ring, fence->seq - 1);
3609 	radeon_ring_write(ring, 0);
3610 
3611 	/* Then send the real EOP event down the pipe. */
3612 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614 				 EOP_TC_ACTION_EN |
3615 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616 				 EVENT_INDEX(5)));
3617 	radeon_ring_write(ring, addr & 0xfffffffc);
3618 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3619 	radeon_ring_write(ring, fence->seq);
3620 	radeon_ring_write(ring, 0);
3621 }
3622 
3623 /**
3624  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3625  *
3626  * @rdev: radeon_device pointer
3627  * @fence: radeon fence object
3628  *
3629  * Emits a fence sequnce number on the compute ring and flushes
3630  * GPU caches.
3631  */
3632 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3633 				 struct radeon_fence *fence)
3634 {
3635 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3636 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3637 
3638 	/* RELEASE_MEM - flush caches, send int */
3639 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3640 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3641 				 EOP_TC_ACTION_EN |
3642 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3643 				 EVENT_INDEX(5)));
3644 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3645 	radeon_ring_write(ring, addr & 0xfffffffc);
3646 	radeon_ring_write(ring, upper_32_bits(addr));
3647 	radeon_ring_write(ring, fence->seq);
3648 	radeon_ring_write(ring, 0);
3649 }
3650 
3651 /**
3652  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3653  *
3654  * @rdev: radeon_device pointer
3655  * @ring: radeon ring buffer object
3656  * @semaphore: radeon semaphore object
3657  * @emit_wait: Is this a sempahore wait?
3658  *
3659  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3660  * from running ahead of semaphore waits.
3661  */
3662 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3663 			     struct radeon_ring *ring,
3664 			     struct radeon_semaphore *semaphore,
3665 			     bool emit_wait)
3666 {
3667 	uint64_t addr = semaphore->gpu_addr;
3668 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3669 
3670 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3671 	radeon_ring_write(ring, lower_32_bits(addr));
3672 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3673 
3674 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3675 		/* Prevent the PFP from running ahead of the semaphore wait */
3676 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3677 		radeon_ring_write(ring, 0x0);
3678 	}
3679 
3680 	return true;
3681 }
3682 
3683 /**
3684  * cik_copy_cpdma - copy pages using the CP DMA engine
3685  *
3686  * @rdev: radeon_device pointer
3687  * @src_offset: src GPU address
3688  * @dst_offset: dst GPU address
3689  * @num_gpu_pages: number of GPU pages to xfer
3690  * @resv: reservation object to sync to
3691  *
3692  * Copy GPU paging using the CP DMA engine (CIK+).
3693  * Used by the radeon ttm implementation to move pages if
3694  * registered as the asic copy callback.
3695  */
3696 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3697 				    uint64_t src_offset, uint64_t dst_offset,
3698 				    unsigned num_gpu_pages,
3699 				    struct reservation_object *resv)
3700 {
3701 	struct radeon_fence *fence;
3702 	struct radeon_sync sync;
3703 	int ring_index = rdev->asic->copy.blit_ring_index;
3704 	struct radeon_ring *ring = &rdev->ring[ring_index];
3705 	u32 size_in_bytes, cur_size_in_bytes, control;
3706 	int i, num_loops;
3707 	int r = 0;
3708 
3709 	radeon_sync_create(&sync);
3710 
3711 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3712 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3713 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3714 	if (r) {
3715 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3716 		radeon_sync_free(rdev, &sync, NULL);
3717 		return ERR_PTR(r);
3718 	}
3719 
3720 	radeon_sync_resv(rdev, &sync, resv, false);
3721 	radeon_sync_rings(rdev, &sync, ring->idx);
3722 
3723 	for (i = 0; i < num_loops; i++) {
3724 		cur_size_in_bytes = size_in_bytes;
3725 		if (cur_size_in_bytes > 0x1fffff)
3726 			cur_size_in_bytes = 0x1fffff;
3727 		size_in_bytes -= cur_size_in_bytes;
3728 		control = 0;
3729 		if (size_in_bytes == 0)
3730 			control |= PACKET3_DMA_DATA_CP_SYNC;
3731 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3732 		radeon_ring_write(ring, control);
3733 		radeon_ring_write(ring, lower_32_bits(src_offset));
3734 		radeon_ring_write(ring, upper_32_bits(src_offset));
3735 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3736 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3737 		radeon_ring_write(ring, cur_size_in_bytes);
3738 		src_offset += cur_size_in_bytes;
3739 		dst_offset += cur_size_in_bytes;
3740 	}
3741 
3742 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3743 	if (r) {
3744 		radeon_ring_unlock_undo(rdev, ring);
3745 		radeon_sync_free(rdev, &sync, NULL);
3746 		return ERR_PTR(r);
3747 	}
3748 
3749 	radeon_ring_unlock_commit(rdev, ring, false);
3750 	radeon_sync_free(rdev, &sync, fence);
3751 
3752 	return fence;
3753 }
3754 
3755 /*
3756  * IB stuff
3757  */
3758 /**
3759  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3760  *
3761  * @rdev: radeon_device pointer
3762  * @ib: radeon indirect buffer object
3763  *
3764  * Emits a DE (drawing engine) or CE (constant engine) IB
3765  * on the gfx ring.  IBs are usually generated by userspace
3766  * acceleration drivers and submitted to the kernel for
3767  * scheduling on the ring.  This function schedules the IB
3768  * on the gfx ring for execution by the GPU.
3769  */
3770 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3771 {
3772 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3773 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3774 	u32 header, control = INDIRECT_BUFFER_VALID;
3775 
3776 	if (ib->is_const_ib) {
3777 		/* set switch buffer packet before const IB */
3778 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3779 		radeon_ring_write(ring, 0);
3780 
3781 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3782 	} else {
3783 		u32 next_rptr;
3784 		if (ring->rptr_save_reg) {
3785 			next_rptr = ring->wptr + 3 + 4;
3786 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3787 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3788 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3789 			radeon_ring_write(ring, next_rptr);
3790 		} else if (rdev->wb.enabled) {
3791 			next_rptr = ring->wptr + 5 + 4;
3792 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3793 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3794 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3795 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3796 			radeon_ring_write(ring, next_rptr);
3797 		}
3798 
3799 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3800 	}
3801 
3802 	control |= ib->length_dw | (vm_id << 24);
3803 
3804 	radeon_ring_write(ring, header);
3805 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3806 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3807 	radeon_ring_write(ring, control);
3808 }
3809 
3810 /**
3811  * cik_ib_test - basic gfx ring IB test
3812  *
3813  * @rdev: radeon_device pointer
3814  * @ring: radeon_ring structure holding ring information
3815  *
3816  * Allocate an IB and execute it on the gfx ring (CIK).
3817  * Provides a basic gfx ring test to verify that IBs are working.
3818  * Returns 0 on success, error on failure.
3819  */
3820 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3821 {
3822 	struct radeon_ib ib;
3823 	uint32_t scratch;
3824 	uint32_t tmp = 0;
3825 	unsigned i;
3826 	int r;
3827 
3828 	r = radeon_scratch_get(rdev, &scratch);
3829 	if (r) {
3830 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3831 		return r;
3832 	}
3833 	WREG32(scratch, 0xCAFEDEAD);
3834 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3835 	if (r) {
3836 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3837 		radeon_scratch_free(rdev, scratch);
3838 		return r;
3839 	}
3840 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3841 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3842 	ib.ptr[2] = 0xDEADBEEF;
3843 	ib.length_dw = 3;
3844 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3845 	if (r) {
3846 		radeon_scratch_free(rdev, scratch);
3847 		radeon_ib_free(rdev, &ib);
3848 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3849 		return r;
3850 	}
3851 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3852 		RADEON_USEC_IB_TEST_TIMEOUT));
3853 	if (r < 0) {
3854 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3855 		radeon_scratch_free(rdev, scratch);
3856 		radeon_ib_free(rdev, &ib);
3857 		return r;
3858 	} else if (r == 0) {
3859 		DRM_ERROR("radeon: fence wait timed out.\n");
3860 		radeon_scratch_free(rdev, scratch);
3861 		radeon_ib_free(rdev, &ib);
3862 		return -ETIMEDOUT;
3863 	}
3864 	r = 0;
3865 	for (i = 0; i < rdev->usec_timeout; i++) {
3866 		tmp = RREG32(scratch);
3867 		if (tmp == 0xDEADBEEF)
3868 			break;
3869 		DRM_UDELAY(1);
3870 	}
3871 	if (i < rdev->usec_timeout) {
3872 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3873 	} else {
3874 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3875 			  scratch, tmp);
3876 		r = -EINVAL;
3877 	}
3878 	radeon_scratch_free(rdev, scratch);
3879 	radeon_ib_free(rdev, &ib);
3880 	return r;
3881 }
3882 
3883 /*
3884  * CP.
3885  * On CIK, gfx and compute now have independant command processors.
3886  *
3887  * GFX
3888  * Gfx consists of a single ring and can process both gfx jobs and
3889  * compute jobs.  The gfx CP consists of three microengines (ME):
3890  * PFP - Pre-Fetch Parser
3891  * ME - Micro Engine
3892  * CE - Constant Engine
3893  * The PFP and ME make up what is considered the Drawing Engine (DE).
3894  * The CE is an asynchronous engine used for updating buffer desciptors
3895  * used by the DE so that they can be loaded into cache in parallel
3896  * while the DE is processing state update packets.
3897  *
3898  * Compute
3899  * The compute CP consists of two microengines (ME):
3900  * MEC1 - Compute MicroEngine 1
3901  * MEC2 - Compute MicroEngine 2
3902  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3903  * The queues are exposed to userspace and are programmed directly
3904  * by the compute runtime.
3905  */
3906 /**
3907  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3908  *
3909  * @rdev: radeon_device pointer
3910  * @enable: enable or disable the MEs
3911  *
3912  * Halts or unhalts the gfx MEs.
3913  */
3914 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3915 {
3916 	if (enable)
3917 		WREG32(CP_ME_CNTL, 0);
3918 	else {
3919 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3920 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3921 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3922 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3923 	}
3924 	udelay(50);
3925 }
3926 
3927 /**
3928  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3929  *
3930  * @rdev: radeon_device pointer
3931  *
3932  * Loads the gfx PFP, ME, and CE ucode.
3933  * Returns 0 for success, -EINVAL if the ucode is not available.
3934  */
3935 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3936 {
3937 	int i;
3938 
3939 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3940 		return -EINVAL;
3941 
3942 	cik_cp_gfx_enable(rdev, false);
3943 
3944 	if (rdev->new_fw) {
3945 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3946 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3947 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3948 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3949 		const struct gfx_firmware_header_v1_0 *me_hdr =
3950 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3951 		const __le32 *fw_data;
3952 		u32 fw_size;
3953 
3954 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3955 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3956 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3957 
3958 		/* PFP */
3959 		fw_data = (const __le32 *)
3960 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3961 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3962 		WREG32(CP_PFP_UCODE_ADDR, 0);
3963 		for (i = 0; i < fw_size; i++)
3964 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3965 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3966 
3967 		/* CE */
3968 		fw_data = (const __le32 *)
3969 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3970 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3971 		WREG32(CP_CE_UCODE_ADDR, 0);
3972 		for (i = 0; i < fw_size; i++)
3973 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3974 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3975 
3976 		/* ME */
3977 		fw_data = (const __be32 *)
3978 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3979 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3980 		WREG32(CP_ME_RAM_WADDR, 0);
3981 		for (i = 0; i < fw_size; i++)
3982 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3983 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3984 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985 	} else {
3986 		const __be32 *fw_data;
3987 
3988 		/* PFP */
3989 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3990 		WREG32(CP_PFP_UCODE_ADDR, 0);
3991 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3992 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3993 		WREG32(CP_PFP_UCODE_ADDR, 0);
3994 
3995 		/* CE */
3996 		fw_data = (const __be32 *)rdev->ce_fw->data;
3997 		WREG32(CP_CE_UCODE_ADDR, 0);
3998 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3999 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4000 		WREG32(CP_CE_UCODE_ADDR, 0);
4001 
4002 		/* ME */
4003 		fw_data = (const __be32 *)rdev->me_fw->data;
4004 		WREG32(CP_ME_RAM_WADDR, 0);
4005 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4006 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4007 		WREG32(CP_ME_RAM_WADDR, 0);
4008 	}
4009 
4010 	return 0;
4011 }
4012 
4013 /**
4014  * cik_cp_gfx_start - start the gfx ring
4015  *
4016  * @rdev: radeon_device pointer
4017  *
4018  * Enables the ring and loads the clear state context and other
4019  * packets required to init the ring.
4020  * Returns 0 for success, error for failure.
4021  */
4022 static int cik_cp_gfx_start(struct radeon_device *rdev)
4023 {
4024 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4025 	int r, i;
4026 
4027 	/* init the CP */
4028 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4029 	WREG32(CP_ENDIAN_SWAP, 0);
4030 	WREG32(CP_DEVICE_ID, 1);
4031 
4032 	cik_cp_gfx_enable(rdev, true);
4033 
4034 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4035 	if (r) {
4036 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4037 		return r;
4038 	}
4039 
4040 	/* init the CE partitions.  CE only used for gfx on CIK */
4041 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4042 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4043 	radeon_ring_write(ring, 0x8000);
4044 	radeon_ring_write(ring, 0x8000);
4045 
4046 	/* setup clear context state */
4047 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4048 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4049 
4050 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4051 	radeon_ring_write(ring, 0x80000000);
4052 	radeon_ring_write(ring, 0x80000000);
4053 
4054 	for (i = 0; i < cik_default_size; i++)
4055 		radeon_ring_write(ring, cik_default_state[i]);
4056 
4057 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4058 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4059 
4060 	/* set clear context state */
4061 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4062 	radeon_ring_write(ring, 0);
4063 
4064 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4065 	radeon_ring_write(ring, 0x00000316);
4066 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4067 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4068 
4069 	radeon_ring_unlock_commit(rdev, ring, false);
4070 
4071 	return 0;
4072 }
4073 
4074 /**
4075  * cik_cp_gfx_fini - stop the gfx ring
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Stop the gfx ring and tear down the driver ring
4080  * info.
4081  */
4082 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4083 {
4084 	cik_cp_gfx_enable(rdev, false);
4085 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4086 }
4087 
4088 /**
4089  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Program the location and size of the gfx ring buffer
4094  * and test it to make sure it's working.
4095  * Returns 0 for success, error for failure.
4096  */
4097 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4098 {
4099 	struct radeon_ring *ring;
4100 	u32 tmp;
4101 	u32 rb_bufsz;
4102 	u64 rb_addr;
4103 	int r;
4104 
4105 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4106 	if (rdev->family != CHIP_HAWAII)
4107 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4108 
4109 	/* Set the write pointer delay */
4110 	WREG32(CP_RB_WPTR_DELAY, 0);
4111 
4112 	/* set the RB to use vmid 0 */
4113 	WREG32(CP_RB_VMID, 0);
4114 
4115 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4116 
4117 	/* ring 0 - compute and gfx */
4118 	/* Set ring buffer size */
4119 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4120 	rb_bufsz = order_base_2(ring->ring_size / 8);
4121 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4122 #ifdef __BIG_ENDIAN
4123 	tmp |= BUF_SWAP_32BIT;
4124 #endif
4125 	WREG32(CP_RB0_CNTL, tmp);
4126 
4127 	/* Initialize the ring buffer's read and write pointers */
4128 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4129 	ring->wptr = 0;
4130 	WREG32(CP_RB0_WPTR, ring->wptr);
4131 
4132 	/* set the wb address wether it's enabled or not */
4133 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4134 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4135 
4136 	/* scratch register shadowing is no longer supported */
4137 	WREG32(SCRATCH_UMSK, 0);
4138 
4139 	if (!rdev->wb.enabled)
4140 		tmp |= RB_NO_UPDATE;
4141 
4142 	mdelay(1);
4143 	WREG32(CP_RB0_CNTL, tmp);
4144 
4145 	rb_addr = ring->gpu_addr >> 8;
4146 	WREG32(CP_RB0_BASE, rb_addr);
4147 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4148 
4149 	/* start the ring */
4150 	cik_cp_gfx_start(rdev);
4151 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4152 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4153 	if (r) {
4154 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4155 		return r;
4156 	}
4157 
4158 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4159 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4160 
4161 	return 0;
4162 }
4163 
4164 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4165 		     struct radeon_ring *ring)
4166 {
4167 	u32 rptr;
4168 
4169 	if (rdev->wb.enabled)
4170 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4171 	else
4172 		rptr = RREG32(CP_RB0_RPTR);
4173 
4174 	return rptr;
4175 }
4176 
4177 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4178 		     struct radeon_ring *ring)
4179 {
4180 	return RREG32(CP_RB0_WPTR);
4181 }
4182 
4183 void cik_gfx_set_wptr(struct radeon_device *rdev,
4184 		      struct radeon_ring *ring)
4185 {
4186 	WREG32(CP_RB0_WPTR, ring->wptr);
4187 	(void)RREG32(CP_RB0_WPTR);
4188 }
4189 
4190 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4191 			 struct radeon_ring *ring)
4192 {
4193 	u32 rptr;
4194 
4195 	if (rdev->wb.enabled) {
4196 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4197 	} else {
4198 		mutex_lock(&rdev->srbm_mutex);
4199 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200 		rptr = RREG32(CP_HQD_PQ_RPTR);
4201 		cik_srbm_select(rdev, 0, 0, 0, 0);
4202 		mutex_unlock(&rdev->srbm_mutex);
4203 	}
4204 
4205 	return rptr;
4206 }
4207 
4208 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4209 			 struct radeon_ring *ring)
4210 {
4211 	u32 wptr;
4212 
4213 	if (rdev->wb.enabled) {
4214 		/* XXX check if swapping is necessary on BE */
4215 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4216 	} else {
4217 		mutex_lock(&rdev->srbm_mutex);
4218 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4219 		wptr = RREG32(CP_HQD_PQ_WPTR);
4220 		cik_srbm_select(rdev, 0, 0, 0, 0);
4221 		mutex_unlock(&rdev->srbm_mutex);
4222 	}
4223 
4224 	return wptr;
4225 }
4226 
4227 void cik_compute_set_wptr(struct radeon_device *rdev,
4228 			  struct radeon_ring *ring)
4229 {
4230 	/* XXX check if swapping is necessary on BE */
4231 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4232 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4233 }
4234 
4235 static void cik_compute_stop(struct radeon_device *rdev,
4236 			     struct radeon_ring *ring)
4237 {
4238 	u32 j, tmp;
4239 
4240 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4241 	/* Disable wptr polling. */
4242 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4243 	tmp &= ~WPTR_POLL_EN;
4244 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4245 	/* Disable HQD. */
4246 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4247 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4248 		for (j = 0; j < rdev->usec_timeout; j++) {
4249 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4250 				break;
4251 			udelay(1);
4252 		}
4253 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4254 		WREG32(CP_HQD_PQ_RPTR, 0);
4255 		WREG32(CP_HQD_PQ_WPTR, 0);
4256 	}
4257 	cik_srbm_select(rdev, 0, 0, 0, 0);
4258 }
4259 
4260 /**
4261  * cik_cp_compute_enable - enable/disable the compute CP MEs
4262  *
4263  * @rdev: radeon_device pointer
4264  * @enable: enable or disable the MEs
4265  *
4266  * Halts or unhalts the compute MEs.
4267  */
4268 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4269 {
4270 	if (enable)
4271 		WREG32(CP_MEC_CNTL, 0);
4272 	else {
4273 		/*
4274 		 * To make hibernation reliable we need to clear compute ring
4275 		 * configuration before halting the compute ring.
4276 		 */
4277 		mutex_lock(&rdev->srbm_mutex);
4278 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4279 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4280 		mutex_unlock(&rdev->srbm_mutex);
4281 
4282 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4283 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4284 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4285 	}
4286 	udelay(50);
4287 }
4288 
4289 /**
4290  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4291  *
4292  * @rdev: radeon_device pointer
4293  *
4294  * Loads the compute MEC1&2 ucode.
4295  * Returns 0 for success, -EINVAL if the ucode is not available.
4296  */
4297 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4298 {
4299 	int i;
4300 
4301 	if (!rdev->mec_fw)
4302 		return -EINVAL;
4303 
4304 	cik_cp_compute_enable(rdev, false);
4305 
4306 	if (rdev->new_fw) {
4307 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4308 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4309 		const __le32 *fw_data;
4310 		u32 fw_size;
4311 
4312 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4313 
4314 		/* MEC1 */
4315 		fw_data = (const __le32 *)
4316 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4317 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4318 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319 		for (i = 0; i < fw_size; i++)
4320 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4321 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4322 
4323 		/* MEC2 */
4324 		if (rdev->family == CHIP_KAVERI) {
4325 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4326 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4327 
4328 			fw_data = (const __le32 *)
4329 				(rdev->mec2_fw->data +
4330 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4331 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4332 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4333 			for (i = 0; i < fw_size; i++)
4334 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4335 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4336 		}
4337 	} else {
4338 		const __be32 *fw_data;
4339 
4340 		/* MEC1 */
4341 		fw_data = (const __be32 *)rdev->mec_fw->data;
4342 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4343 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4344 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4345 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4346 
4347 		if (rdev->family == CHIP_KAVERI) {
4348 			/* MEC2 */
4349 			fw_data = (const __be32 *)rdev->mec_fw->data;
4350 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4351 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4352 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4353 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4354 		}
4355 	}
4356 
4357 	return 0;
4358 }
4359 
4360 /**
4361  * cik_cp_compute_start - start the compute queues
4362  *
4363  * @rdev: radeon_device pointer
4364  *
4365  * Enable the compute queues.
4366  * Returns 0 for success, error for failure.
4367  */
4368 static int cik_cp_compute_start(struct radeon_device *rdev)
4369 {
4370 	cik_cp_compute_enable(rdev, true);
4371 
4372 	return 0;
4373 }
4374 
4375 /**
4376  * cik_cp_compute_fini - stop the compute queues
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Stop the compute queues and tear down the driver queue
4381  * info.
4382  */
4383 static void cik_cp_compute_fini(struct radeon_device *rdev)
4384 {
4385 	int i, idx, r;
4386 
4387 	cik_cp_compute_enable(rdev, false);
4388 
4389 	for (i = 0; i < 2; i++) {
4390 		if (i == 0)
4391 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4392 		else
4393 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4394 
4395 		if (rdev->ring[idx].mqd_obj) {
4396 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4397 			if (unlikely(r != 0))
4398 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4399 
4400 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4401 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4402 
4403 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4404 			rdev->ring[idx].mqd_obj = NULL;
4405 		}
4406 	}
4407 }
4408 
4409 static void cik_mec_fini(struct radeon_device *rdev)
4410 {
4411 	int r;
4412 
4413 	if (rdev->mec.hpd_eop_obj) {
4414 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415 		if (unlikely(r != 0))
4416 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4417 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4418 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4419 
4420 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4421 		rdev->mec.hpd_eop_obj = NULL;
4422 	}
4423 }
4424 
4425 #define MEC_HPD_SIZE 2048
4426 
4427 static int cik_mec_init(struct radeon_device *rdev)
4428 {
4429 	int r;
4430 	u32 *hpd;
4431 
4432 	/*
4433 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4434 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4435 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4436 	 * be handled by KFD
4437 	 */
4438 	rdev->mec.num_mec = 1;
4439 	rdev->mec.num_pipe = 1;
4440 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4441 
4442 	if (rdev->mec.hpd_eop_obj == NULL) {
4443 		r = radeon_bo_create(rdev,
4444 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4445 				     PAGE_SIZE, true,
4446 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4447 				     &rdev->mec.hpd_eop_obj);
4448 		if (r) {
4449 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4450 			return r;
4451 		}
4452 	}
4453 
4454 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4455 	if (unlikely(r != 0)) {
4456 		cik_mec_fini(rdev);
4457 		return r;
4458 	}
4459 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4460 			  &rdev->mec.hpd_eop_gpu_addr);
4461 	if (r) {
4462 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4463 		cik_mec_fini(rdev);
4464 		return r;
4465 	}
4466 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4467 	if (r) {
4468 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4469 		cik_mec_fini(rdev);
4470 		return r;
4471 	}
4472 
4473 	/* clear memory.  Not sure if this is required or not */
4474 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4475 
4476 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4477 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4478 
4479 	return 0;
4480 }
4481 
4482 struct hqd_registers
4483 {
4484 	u32 cp_mqd_base_addr;
4485 	u32 cp_mqd_base_addr_hi;
4486 	u32 cp_hqd_active;
4487 	u32 cp_hqd_vmid;
4488 	u32 cp_hqd_persistent_state;
4489 	u32 cp_hqd_pipe_priority;
4490 	u32 cp_hqd_queue_priority;
4491 	u32 cp_hqd_quantum;
4492 	u32 cp_hqd_pq_base;
4493 	u32 cp_hqd_pq_base_hi;
4494 	u32 cp_hqd_pq_rptr;
4495 	u32 cp_hqd_pq_rptr_report_addr;
4496 	u32 cp_hqd_pq_rptr_report_addr_hi;
4497 	u32 cp_hqd_pq_wptr_poll_addr;
4498 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4499 	u32 cp_hqd_pq_doorbell_control;
4500 	u32 cp_hqd_pq_wptr;
4501 	u32 cp_hqd_pq_control;
4502 	u32 cp_hqd_ib_base_addr;
4503 	u32 cp_hqd_ib_base_addr_hi;
4504 	u32 cp_hqd_ib_rptr;
4505 	u32 cp_hqd_ib_control;
4506 	u32 cp_hqd_iq_timer;
4507 	u32 cp_hqd_iq_rptr;
4508 	u32 cp_hqd_dequeue_request;
4509 	u32 cp_hqd_dma_offload;
4510 	u32 cp_hqd_sema_cmd;
4511 	u32 cp_hqd_msg_type;
4512 	u32 cp_hqd_atomic0_preop_lo;
4513 	u32 cp_hqd_atomic0_preop_hi;
4514 	u32 cp_hqd_atomic1_preop_lo;
4515 	u32 cp_hqd_atomic1_preop_hi;
4516 	u32 cp_hqd_hq_scheduler0;
4517 	u32 cp_hqd_hq_scheduler1;
4518 	u32 cp_mqd_control;
4519 };
4520 
4521 struct bonaire_mqd
4522 {
4523 	u32 header;
4524 	u32 dispatch_initiator;
4525 	u32 dimensions[3];
4526 	u32 start_idx[3];
4527 	u32 num_threads[3];
4528 	u32 pipeline_stat_enable;
4529 	u32 perf_counter_enable;
4530 	u32 pgm[2];
4531 	u32 tba[2];
4532 	u32 tma[2];
4533 	u32 pgm_rsrc[2];
4534 	u32 vmid;
4535 	u32 resource_limits;
4536 	u32 static_thread_mgmt01[2];
4537 	u32 tmp_ring_size;
4538 	u32 static_thread_mgmt23[2];
4539 	u32 restart[3];
4540 	u32 thread_trace_enable;
4541 	u32 reserved1;
4542 	u32 user_data[16];
4543 	u32 vgtcs_invoke_count[2];
4544 	struct hqd_registers queue_state;
4545 	u32 dequeue_cntr;
4546 	u32 interrupt_queue[64];
4547 };
4548 
4549 /**
4550  * cik_cp_compute_resume - setup the compute queue registers
4551  *
4552  * @rdev: radeon_device pointer
4553  *
4554  * Program the compute queues and test them to make sure they
4555  * are working.
4556  * Returns 0 for success, error for failure.
4557  */
4558 static int cik_cp_compute_resume(struct radeon_device *rdev)
4559 {
4560 	int r, i, j, idx;
4561 	u32 tmp;
4562 	bool use_doorbell = true;
4563 	u64 hqd_gpu_addr;
4564 	u64 mqd_gpu_addr;
4565 	u64 eop_gpu_addr;
4566 	u64 wb_gpu_addr;
4567 	u32 *buf;
4568 	struct bonaire_mqd *mqd;
4569 
4570 	r = cik_cp_compute_start(rdev);
4571 	if (r)
4572 		return r;
4573 
4574 	/* fix up chicken bits */
4575 	tmp = RREG32(CP_CPF_DEBUG);
4576 	tmp |= (1 << 23);
4577 	WREG32(CP_CPF_DEBUG, tmp);
4578 
4579 	/* init the pipes */
4580 	mutex_lock(&rdev->srbm_mutex);
4581 
4582 	for (i = 0; i < rdev->mec.num_pipe; ++i) {
4583 		cik_srbm_select(rdev, 0, i, 0, 0);
4584 
4585 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4586 		/* write the EOP addr */
4587 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4588 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4589 
4590 		/* set the VMID assigned */
4591 		WREG32(CP_HPD_EOP_VMID, 0);
4592 
4593 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4594 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4595 		tmp &= ~EOP_SIZE_MASK;
4596 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4597 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4598 
4599 	}
4600 	mutex_unlock(&rdev->srbm_mutex);
4601 
4602 	/* init the queues.  Just two for now. */
4603 	for (i = 0; i < 2; i++) {
4604 		if (i == 0)
4605 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606 		else
4607 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608 
4609 		if (rdev->ring[idx].mqd_obj == NULL) {
4610 			r = radeon_bo_create(rdev,
4611 					     sizeof(struct bonaire_mqd),
4612 					     PAGE_SIZE, true,
4613 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614 					     NULL, &rdev->ring[idx].mqd_obj);
4615 			if (r) {
4616 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617 				return r;
4618 			}
4619 		}
4620 
4621 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622 		if (unlikely(r != 0)) {
4623 			cik_cp_compute_fini(rdev);
4624 			return r;
4625 		}
4626 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627 				  &mqd_gpu_addr);
4628 		if (r) {
4629 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630 			cik_cp_compute_fini(rdev);
4631 			return r;
4632 		}
4633 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634 		if (r) {
4635 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636 			cik_cp_compute_fini(rdev);
4637 			return r;
4638 		}
4639 
4640 		/* init the mqd struct */
4641 		memset(buf, 0, sizeof(struct bonaire_mqd));
4642 
4643 		mqd = (struct bonaire_mqd *)buf;
4644 		mqd->header = 0xC0310800;
4645 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4646 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4647 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4648 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4649 
4650 		mutex_lock(&rdev->srbm_mutex);
4651 		cik_srbm_select(rdev, rdev->ring[idx].me,
4652 				rdev->ring[idx].pipe,
4653 				rdev->ring[idx].queue, 0);
4654 
4655 		/* disable wptr polling */
4656 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657 		tmp &= ~WPTR_POLL_EN;
4658 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659 
4660 		/* enable doorbell? */
4661 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4662 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663 		if (use_doorbell)
4664 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665 		else
4666 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4669 
4670 		/* disable the queue if it's active */
4671 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4672 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4673 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4674 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4675 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676 			for (j = 0; j < rdev->usec_timeout; j++) {
4677 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678 					break;
4679 				udelay(1);
4680 			}
4681 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684 		}
4685 
4686 		/* set the pointer to the MQD */
4687 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691 		/* set MQD vmid to 0 */
4692 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695 
4696 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702 
4703 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4704 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705 		mqd->queue_state.cp_hqd_pq_control &=
4706 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707 
4708 		mqd->queue_state.cp_hqd_pq_control |=
4709 			order_base_2(rdev->ring[idx].ring_size / 8);
4710 		mqd->queue_state.cp_hqd_pq_control |=
4711 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715 		mqd->queue_state.cp_hqd_pq_control &=
4716 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717 		mqd->queue_state.cp_hqd_pq_control |=
4718 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720 
4721 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722 		if (i == 0)
4723 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724 		else
4725 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731 
4732 		/* set the wb address wether it's enabled or not */
4733 		if (i == 0)
4734 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735 		else
4736 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739 			upper_32_bits(wb_gpu_addr) & 0xffff;
4740 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744 
4745 		/* enable the doorbell if requested */
4746 		if (use_doorbell) {
4747 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4748 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4755 
4756 		} else {
4757 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758 		}
4759 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4761 
4762 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763 		rdev->ring[idx].wptr = 0;
4764 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767 
4768 		/* set the vmid for the queue */
4769 		mqd->queue_state.cp_hqd_vmid = 0;
4770 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771 
4772 		/* activate the queue */
4773 		mqd->queue_state.cp_hqd_active = 1;
4774 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775 
4776 		cik_srbm_select(rdev, 0, 0, 0, 0);
4777 		mutex_unlock(&rdev->srbm_mutex);
4778 
4779 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781 
4782 		rdev->ring[idx].ready = true;
4783 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784 		if (r)
4785 			rdev->ring[idx].ready = false;
4786 	}
4787 
4788 	return 0;
4789 }
4790 
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793 	cik_cp_gfx_enable(rdev, enable);
4794 	cik_cp_compute_enable(rdev, enable);
4795 }
4796 
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799 	int r;
4800 
4801 	r = cik_cp_gfx_load_microcode(rdev);
4802 	if (r)
4803 		return r;
4804 	r = cik_cp_compute_load_microcode(rdev);
4805 	if (r)
4806 		return r;
4807 
4808 	return 0;
4809 }
4810 
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813 	cik_cp_gfx_fini(rdev);
4814 	cik_cp_compute_fini(rdev);
4815 }
4816 
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819 	int r;
4820 
4821 	cik_enable_gui_idle_interrupt(rdev, false);
4822 
4823 	r = cik_cp_load_microcode(rdev);
4824 	if (r)
4825 		return r;
4826 
4827 	r = cik_cp_gfx_resume(rdev);
4828 	if (r)
4829 		return r;
4830 	r = cik_cp_compute_resume(rdev);
4831 	if (r)
4832 		return r;
4833 
4834 	cik_enable_gui_idle_interrupt(rdev, true);
4835 
4836 	return 0;
4837 }
4838 
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842 		RREG32(GRBM_STATUS));
4843 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844 		RREG32(GRBM_STATUS2));
4845 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846 		RREG32(GRBM_STATUS_SE0));
4847 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848 		RREG32(GRBM_STATUS_SE1));
4849 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850 		RREG32(GRBM_STATUS_SE2));
4851 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852 		RREG32(GRBM_STATUS_SE3));
4853 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854 		RREG32(SRBM_STATUS));
4855 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856 		RREG32(SRBM_STATUS2));
4857 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863 		 RREG32(CP_STALLED_STAT1));
4864 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865 		 RREG32(CP_STALLED_STAT2));
4866 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867 		 RREG32(CP_STALLED_STAT3));
4868 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869 		 RREG32(CP_CPF_BUSY_STAT));
4870 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871 		 RREG32(CP_CPF_STALLED_STAT1));
4872 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875 		 RREG32(CP_CPC_STALLED_STAT1));
4876 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878 
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890 	u32 reset_mask = 0;
4891 	u32 tmp;
4892 
4893 	/* GRBM_STATUS */
4894 	tmp = RREG32(GRBM_STATUS);
4895 	if (tmp & (PA_BUSY | SC_BUSY |
4896 		   BCI_BUSY | SX_BUSY |
4897 		   TA_BUSY | VGT_BUSY |
4898 		   DB_BUSY | CB_BUSY |
4899 		   GDS_BUSY | SPI_BUSY |
4900 		   IA_BUSY | IA_BUSY_NO_DMA))
4901 		reset_mask |= RADEON_RESET_GFX;
4902 
4903 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904 		reset_mask |= RADEON_RESET_CP;
4905 
4906 	/* GRBM_STATUS2 */
4907 	tmp = RREG32(GRBM_STATUS2);
4908 	if (tmp & RLC_BUSY)
4909 		reset_mask |= RADEON_RESET_RLC;
4910 
4911 	/* SDMA0_STATUS_REG */
4912 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913 	if (!(tmp & SDMA_IDLE))
4914 		reset_mask |= RADEON_RESET_DMA;
4915 
4916 	/* SDMA1_STATUS_REG */
4917 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918 	if (!(tmp & SDMA_IDLE))
4919 		reset_mask |= RADEON_RESET_DMA1;
4920 
4921 	/* SRBM_STATUS2 */
4922 	tmp = RREG32(SRBM_STATUS2);
4923 	if (tmp & SDMA_BUSY)
4924 		reset_mask |= RADEON_RESET_DMA;
4925 
4926 	if (tmp & SDMA1_BUSY)
4927 		reset_mask |= RADEON_RESET_DMA1;
4928 
4929 	/* SRBM_STATUS */
4930 	tmp = RREG32(SRBM_STATUS);
4931 
4932 	if (tmp & IH_BUSY)
4933 		reset_mask |= RADEON_RESET_IH;
4934 
4935 	if (tmp & SEM_BUSY)
4936 		reset_mask |= RADEON_RESET_SEM;
4937 
4938 	if (tmp & GRBM_RQ_PENDING)
4939 		reset_mask |= RADEON_RESET_GRBM;
4940 
4941 	if (tmp & VMC_BUSY)
4942 		reset_mask |= RADEON_RESET_VMC;
4943 
4944 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945 		   MCC_BUSY | MCD_BUSY))
4946 		reset_mask |= RADEON_RESET_MC;
4947 
4948 	if (evergreen_is_display_hung(rdev))
4949 		reset_mask |= RADEON_RESET_DISPLAY;
4950 
4951 	/* Skip MC reset as it's mostly likely not hung, just busy */
4952 	if (reset_mask & RADEON_RESET_MC) {
4953 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954 		reset_mask &= ~RADEON_RESET_MC;
4955 	}
4956 
4957 	return reset_mask;
4958 }
4959 
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970 	struct evergreen_mc_save save;
4971 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972 	u32 tmp;
4973 
4974 	if (reset_mask == 0)
4975 		return;
4976 
4977 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978 
4979 	cik_print_gpu_status_regs(rdev);
4980 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984 
4985 	/* disable CG/PG */
4986 	cik_fini_pg(rdev);
4987 	cik_fini_cg(rdev);
4988 
4989 	/* stop the rlc */
4990 	cik_rlc_stop(rdev);
4991 
4992 	/* Disable GFX parsing/prefetching */
4993 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994 
4995 	/* Disable MEC parsing/prefetching */
4996 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997 
4998 	if (reset_mask & RADEON_RESET_DMA) {
4999 		/* sdma0 */
5000 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001 		tmp |= SDMA_HALT;
5002 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003 	}
5004 	if (reset_mask & RADEON_RESET_DMA1) {
5005 		/* sdma1 */
5006 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007 		tmp |= SDMA_HALT;
5008 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009 	}
5010 
5011 	evergreen_mc_stop(rdev, &save);
5012 	if (evergreen_mc_wait_for_idle(rdev)) {
5013 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014 	}
5015 
5016 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018 
5019 	if (reset_mask & RADEON_RESET_CP) {
5020 		grbm_soft_reset |= SOFT_RESET_CP;
5021 
5022 		srbm_soft_reset |= SOFT_RESET_GRBM;
5023 	}
5024 
5025 	if (reset_mask & RADEON_RESET_DMA)
5026 		srbm_soft_reset |= SOFT_RESET_SDMA;
5027 
5028 	if (reset_mask & RADEON_RESET_DMA1)
5029 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5030 
5031 	if (reset_mask & RADEON_RESET_DISPLAY)
5032 		srbm_soft_reset |= SOFT_RESET_DC;
5033 
5034 	if (reset_mask & RADEON_RESET_RLC)
5035 		grbm_soft_reset |= SOFT_RESET_RLC;
5036 
5037 	if (reset_mask & RADEON_RESET_SEM)
5038 		srbm_soft_reset |= SOFT_RESET_SEM;
5039 
5040 	if (reset_mask & RADEON_RESET_IH)
5041 		srbm_soft_reset |= SOFT_RESET_IH;
5042 
5043 	if (reset_mask & RADEON_RESET_GRBM)
5044 		srbm_soft_reset |= SOFT_RESET_GRBM;
5045 
5046 	if (reset_mask & RADEON_RESET_VMC)
5047 		srbm_soft_reset |= SOFT_RESET_VMC;
5048 
5049 	if (!(rdev->flags & RADEON_IS_IGP)) {
5050 		if (reset_mask & RADEON_RESET_MC)
5051 			srbm_soft_reset |= SOFT_RESET_MC;
5052 	}
5053 
5054 	if (grbm_soft_reset) {
5055 		tmp = RREG32(GRBM_SOFT_RESET);
5056 		tmp |= grbm_soft_reset;
5057 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058 		WREG32(GRBM_SOFT_RESET, tmp);
5059 		tmp = RREG32(GRBM_SOFT_RESET);
5060 
5061 		udelay(50);
5062 
5063 		tmp &= ~grbm_soft_reset;
5064 		WREG32(GRBM_SOFT_RESET, tmp);
5065 		tmp = RREG32(GRBM_SOFT_RESET);
5066 	}
5067 
5068 	if (srbm_soft_reset) {
5069 		tmp = RREG32(SRBM_SOFT_RESET);
5070 		tmp |= srbm_soft_reset;
5071 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072 		WREG32(SRBM_SOFT_RESET, tmp);
5073 		tmp = RREG32(SRBM_SOFT_RESET);
5074 
5075 		udelay(50);
5076 
5077 		tmp &= ~srbm_soft_reset;
5078 		WREG32(SRBM_SOFT_RESET, tmp);
5079 		tmp = RREG32(SRBM_SOFT_RESET);
5080 	}
5081 
5082 	/* Wait a little for things to settle down */
5083 	udelay(50);
5084 
5085 	evergreen_mc_resume(rdev, &save);
5086 	udelay(50);
5087 
5088 	cik_print_gpu_status_regs(rdev);
5089 }
5090 
5091 struct kv_reset_save_regs {
5092 	u32 gmcon_reng_execute;
5093 	u32 gmcon_misc;
5094 	u32 gmcon_misc3;
5095 };
5096 
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098 				   struct kv_reset_save_regs *save)
5099 {
5100 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101 	save->gmcon_misc = RREG32(GMCON_MISC);
5102 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103 
5104 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106 						STCTRL_STUTTER_EN));
5107 }
5108 
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110 				      struct kv_reset_save_regs *save)
5111 {
5112 	int i;
5113 
5114 	WREG32(GMCON_PGFSM_WRITE, 0);
5115 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116 
5117 	for (i = 0; i < 5; i++)
5118 		WREG32(GMCON_PGFSM_WRITE, 0);
5119 
5120 	WREG32(GMCON_PGFSM_WRITE, 0);
5121 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122 
5123 	for (i = 0; i < 5; i++)
5124 		WREG32(GMCON_PGFSM_WRITE, 0);
5125 
5126 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128 
5129 	for (i = 0; i < 5; i++)
5130 		WREG32(GMCON_PGFSM_WRITE, 0);
5131 
5132 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134 
5135 	for (i = 0; i < 5; i++)
5136 		WREG32(GMCON_PGFSM_WRITE, 0);
5137 
5138 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140 
5141 	for (i = 0; i < 5; i++)
5142 		WREG32(GMCON_PGFSM_WRITE, 0);
5143 
5144 	WREG32(GMCON_PGFSM_WRITE, 0);
5145 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146 
5147 	for (i = 0; i < 5; i++)
5148 		WREG32(GMCON_PGFSM_WRITE, 0);
5149 
5150 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152 
5153 	for (i = 0; i < 5; i++)
5154 		WREG32(GMCON_PGFSM_WRITE, 0);
5155 
5156 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158 
5159 	for (i = 0; i < 5; i++)
5160 		WREG32(GMCON_PGFSM_WRITE, 0);
5161 
5162 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164 
5165 	for (i = 0; i < 5; i++)
5166 		WREG32(GMCON_PGFSM_WRITE, 0);
5167 
5168 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170 
5171 	for (i = 0; i < 5; i++)
5172 		WREG32(GMCON_PGFSM_WRITE, 0);
5173 
5174 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176 
5177 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5178 	WREG32(GMCON_MISC, save->gmcon_misc);
5179 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181 
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184 	struct evergreen_mc_save save;
5185 	struct kv_reset_save_regs kv_save = { 0 };
5186 	u32 tmp, i;
5187 
5188 	dev_info(rdev->dev, "GPU pci config reset\n");
5189 
5190 	/* disable dpm? */
5191 
5192 	/* disable cg/pg */
5193 	cik_fini_pg(rdev);
5194 	cik_fini_cg(rdev);
5195 
5196 	/* Disable GFX parsing/prefetching */
5197 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198 
5199 	/* Disable MEC parsing/prefetching */
5200 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201 
5202 	/* sdma0 */
5203 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204 	tmp |= SDMA_HALT;
5205 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206 	/* sdma1 */
5207 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208 	tmp |= SDMA_HALT;
5209 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210 	/* XXX other engines? */
5211 
5212 	/* halt the rlc, disable cp internal ints */
5213 	cik_rlc_stop(rdev);
5214 
5215 	udelay(50);
5216 
5217 	/* disable mem access */
5218 	evergreen_mc_stop(rdev, &save);
5219 	if (evergreen_mc_wait_for_idle(rdev)) {
5220 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221 	}
5222 
5223 	if (rdev->flags & RADEON_IS_IGP)
5224 		kv_save_regs_for_reset(rdev, &kv_save);
5225 
5226 	/* disable BM */
5227 	pci_clear_master(rdev->pdev);
5228 	/* reset */
5229 	radeon_pci_config_reset(rdev);
5230 
5231 	udelay(100);
5232 
5233 	/* wait for asic to come out of reset */
5234 	for (i = 0; i < rdev->usec_timeout; i++) {
5235 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236 			break;
5237 		udelay(1);
5238 	}
5239 
5240 	/* does asic init need to be run first??? */
5241 	if (rdev->flags & RADEON_IS_IGP)
5242 		kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244 
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257 	u32 reset_mask;
5258 
5259 	if (hard) {
5260 		cik_gpu_pci_config_reset(rdev);
5261 		return 0;
5262 	}
5263 
5264 	reset_mask = cik_gpu_check_soft_reset(rdev);
5265 
5266 	if (reset_mask)
5267 		r600_set_bios_scratch_engine_hung(rdev, true);
5268 
5269 	/* try soft reset */
5270 	cik_gpu_soft_reset(rdev, reset_mask);
5271 
5272 	reset_mask = cik_gpu_check_soft_reset(rdev);
5273 
5274 	/* try pci config reset */
5275 	if (reset_mask && radeon_hard_reset)
5276 		cik_gpu_pci_config_reset(rdev);
5277 
5278 	reset_mask = cik_gpu_check_soft_reset(rdev);
5279 
5280 	if (!reset_mask)
5281 		r600_set_bios_scratch_engine_hung(rdev, false);
5282 
5283 	return 0;
5284 }
5285 
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298 
5299 	if (!(reset_mask & (RADEON_RESET_GFX |
5300 			    RADEON_RESET_COMPUTE |
5301 			    RADEON_RESET_CP))) {
5302 		radeon_ring_lockup_update(rdev, ring);
5303 		return false;
5304 	}
5305 	return radeon_ring_test_lockup(rdev, ring);
5306 }
5307 
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319 	struct evergreen_mc_save save;
5320 	u32 tmp;
5321 	int i, j;
5322 
5323 	/* Initialize HDP */
5324 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325 		WREG32((0x2c14 + j), 0x00000000);
5326 		WREG32((0x2c18 + j), 0x00000000);
5327 		WREG32((0x2c1c + j), 0x00000000);
5328 		WREG32((0x2c20 + j), 0x00000000);
5329 		WREG32((0x2c24 + j), 0x00000000);
5330 	}
5331 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332 
5333 	evergreen_mc_stop(rdev, &save);
5334 	if (radeon_mc_wait_for_idle(rdev)) {
5335 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336 	}
5337 	/* Lockout access through VGA aperture*/
5338 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339 	/* Update configuration */
5340 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341 	       rdev->mc.vram_start >> 12);
5342 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343 	       rdev->mc.vram_end >> 12);
5344 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345 	       rdev->vram_scratch.gpu_addr >> 12);
5346 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348 	WREG32(MC_VM_FB_LOCATION, tmp);
5349 	/* XXX double check these! */
5350 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353 	WREG32(MC_VM_AGP_BASE, 0);
5354 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356 	if (radeon_mc_wait_for_idle(rdev)) {
5357 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358 	}
5359 	evergreen_mc_resume(rdev, &save);
5360 	/* we need to own VRAM, so turn off the VGA renderer here
5361 	 * to stop it overwriting our objects */
5362 	rv515_vga_render_disable(rdev);
5363 }
5364 
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376 	u32 tmp;
5377 	int chansize, numchan;
5378 
5379 	/* Get VRAM informations */
5380 	rdev->mc.vram_is_ddr = true;
5381 	tmp = RREG32(MC_ARB_RAMCFG);
5382 	if (tmp & CHANSIZE_MASK) {
5383 		chansize = 64;
5384 	} else {
5385 		chansize = 32;
5386 	}
5387 	tmp = RREG32(MC_SHARED_CHMAP);
5388 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389 	case 0:
5390 	default:
5391 		numchan = 1;
5392 		break;
5393 	case 1:
5394 		numchan = 2;
5395 		break;
5396 	case 2:
5397 		numchan = 4;
5398 		break;
5399 	case 3:
5400 		numchan = 8;
5401 		break;
5402 	case 4:
5403 		numchan = 3;
5404 		break;
5405 	case 5:
5406 		numchan = 6;
5407 		break;
5408 	case 6:
5409 		numchan = 10;
5410 		break;
5411 	case 7:
5412 		numchan = 12;
5413 		break;
5414 	case 8:
5415 		numchan = 16;
5416 		break;
5417 	}
5418 	rdev->mc.vram_width = numchan * chansize;
5419 	/* Could aper size report 0 ? */
5420 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422 	/* size in MB on si */
5423 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426 	si_vram_gtt_location(rdev, &rdev->mc);
5427 	radeon_update_bandwidth_info(rdev);
5428 
5429 	return 0;
5430 }
5431 
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447 	/* flush hdp cache */
5448 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449 
5450 	/* bits 0-15 are the VM contexts0-15 */
5451 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453 
5454 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5455 {
5456 	int i;
5457 	uint32_t sh_mem_bases, sh_mem_config;
5458 
5459 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5460 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5461 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5462 
5463 	mutex_lock(&rdev->srbm_mutex);
5464 	for (i = 8; i < 16; i++) {
5465 		cik_srbm_select(rdev, 0, 0, 0, i);
5466 		/* CP and shaders */
5467 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5468 		WREG32(SH_MEM_APE1_BASE, 1);
5469 		WREG32(SH_MEM_APE1_LIMIT, 0);
5470 		WREG32(SH_MEM_BASES, sh_mem_bases);
5471 	}
5472 	cik_srbm_select(rdev, 0, 0, 0, 0);
5473 	mutex_unlock(&rdev->srbm_mutex);
5474 }
5475 
5476 /**
5477  * cik_pcie_gart_enable - gart enable
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * This sets up the TLBs, programs the page tables for VMID0,
5482  * sets up the hw for VMIDs 1-15 which are allocated on
5483  * demand, and sets up the global locations for the LDS, GDS,
5484  * and GPUVM for FSA64 clients (CIK).
5485  * Returns 0 for success, errors for failure.
5486  */
5487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5488 {
5489 	int r, i;
5490 
5491 	if (rdev->gart.robj == NULL) {
5492 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5493 		return -EINVAL;
5494 	}
5495 	r = radeon_gart_table_vram_pin(rdev);
5496 	if (r)
5497 		return r;
5498 	/* Setup TLB control */
5499 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5500 	       (0xA << 7) |
5501 	       ENABLE_L1_TLB |
5502 	       ENABLE_L1_FRAGMENT_PROCESSING |
5503 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5504 	       ENABLE_ADVANCED_DRIVER_MODEL |
5505 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5506 	/* Setup L2 cache */
5507 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5508 	       ENABLE_L2_FRAGMENT_PROCESSING |
5509 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5510 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5511 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5512 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5513 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5514 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5515 	       BANK_SELECT(4) |
5516 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5517 	/* setup context0 */
5518 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5519 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5520 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5521 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5522 			(u32)(rdev->dummy_page.addr >> 12));
5523 	WREG32(VM_CONTEXT0_CNTL2, 0);
5524 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5525 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5526 
5527 	WREG32(0x15D4, 0);
5528 	WREG32(0x15D8, 0);
5529 	WREG32(0x15DC, 0);
5530 
5531 	/* restore context1-15 */
5532 	/* set vm size, must be a multiple of 4 */
5533 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5534 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5535 	for (i = 1; i < 16; i++) {
5536 		if (i < 8)
5537 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5538 			       rdev->vm_manager.saved_table_addr[i]);
5539 		else
5540 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5541 			       rdev->vm_manager.saved_table_addr[i]);
5542 	}
5543 
5544 	/* enable context1-15 */
5545 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5546 	       (u32)(rdev->dummy_page.addr >> 12));
5547 	WREG32(VM_CONTEXT1_CNTL2, 4);
5548 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5549 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5550 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5551 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5552 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5553 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5554 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5555 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5556 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5557 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5558 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5559 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5560 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5561 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5562 
5563 	if (rdev->family == CHIP_KAVERI) {
5564 		u32 tmp = RREG32(CHUB_CONTROL);
5565 		tmp &= ~BYPASS_VM;
5566 		WREG32(CHUB_CONTROL, tmp);
5567 	}
5568 
5569 	/* XXX SH_MEM regs */
5570 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5571 	mutex_lock(&rdev->srbm_mutex);
5572 	for (i = 0; i < 16; i++) {
5573 		cik_srbm_select(rdev, 0, 0, 0, i);
5574 		/* CP and shaders */
5575 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5576 		WREG32(SH_MEM_APE1_BASE, 1);
5577 		WREG32(SH_MEM_APE1_LIMIT, 0);
5578 		WREG32(SH_MEM_BASES, 0);
5579 		/* SDMA GFX */
5580 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5581 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5582 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5583 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5584 		/* XXX SDMA RLC - todo */
5585 	}
5586 	cik_srbm_select(rdev, 0, 0, 0, 0);
5587 	mutex_unlock(&rdev->srbm_mutex);
5588 
5589 	cik_pcie_init_compute_vmid(rdev);
5590 
5591 	cik_pcie_gart_tlb_flush(rdev);
5592 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5593 		 (unsigned)(rdev->mc.gtt_size >> 20),
5594 		 (unsigned long long)rdev->gart.table_addr);
5595 	rdev->gart.ready = true;
5596 	return 0;
5597 }
5598 
5599 /**
5600  * cik_pcie_gart_disable - gart disable
5601  *
5602  * @rdev: radeon_device pointer
5603  *
5604  * This disables all VM page table (CIK).
5605  */
5606 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5607 {
5608 	unsigned i;
5609 
5610 	for (i = 1; i < 16; ++i) {
5611 		uint32_t reg;
5612 		if (i < 8)
5613 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5614 		else
5615 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5616 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5617 	}
5618 
5619 	/* Disable all tables */
5620 	WREG32(VM_CONTEXT0_CNTL, 0);
5621 	WREG32(VM_CONTEXT1_CNTL, 0);
5622 	/* Setup TLB control */
5623 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5624 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5625 	/* Setup L2 cache */
5626 	WREG32(VM_L2_CNTL,
5627 	       ENABLE_L2_FRAGMENT_PROCESSING |
5628 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5629 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5630 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5631 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5632 	WREG32(VM_L2_CNTL2, 0);
5633 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5634 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5635 	radeon_gart_table_vram_unpin(rdev);
5636 }
5637 
5638 /**
5639  * cik_pcie_gart_fini - vm fini callback
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Tears down the driver GART/VM setup (CIK).
5644  */
5645 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5646 {
5647 	cik_pcie_gart_disable(rdev);
5648 	radeon_gart_table_vram_free(rdev);
5649 	radeon_gart_fini(rdev);
5650 }
5651 
5652 /* vm parser */
5653 /**
5654  * cik_ib_parse - vm ib_parse callback
5655  *
5656  * @rdev: radeon_device pointer
5657  * @ib: indirect buffer pointer
5658  *
5659  * CIK uses hw IB checking so this is a nop (CIK).
5660  */
5661 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5662 {
5663 	return 0;
5664 }
5665 
5666 /*
5667  * vm
5668  * VMID 0 is the physical GPU addresses as used by the kernel.
5669  * VMIDs 1-15 are used for userspace clients and are handled
5670  * by the radeon vm/hsa code.
5671  */
5672 /**
5673  * cik_vm_init - cik vm init callback
5674  *
5675  * @rdev: radeon_device pointer
5676  *
5677  * Inits cik specific vm parameters (number of VMs, base of vram for
5678  * VMIDs 1-15) (CIK).
5679  * Returns 0 for success.
5680  */
5681 int cik_vm_init(struct radeon_device *rdev)
5682 {
5683 	/*
5684 	 * number of VMs
5685 	 * VMID 0 is reserved for System
5686 	 * radeon graphics/compute will use VMIDs 1-15
5687 	 */
5688 	rdev->vm_manager.nvm = 16;
5689 	/* base offset of vram pages */
5690 	if (rdev->flags & RADEON_IS_IGP) {
5691 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5692 		tmp <<= 22;
5693 		rdev->vm_manager.vram_base_offset = tmp;
5694 	} else
5695 		rdev->vm_manager.vram_base_offset = 0;
5696 
5697 	return 0;
5698 }
5699 
5700 /**
5701  * cik_vm_fini - cik vm fini callback
5702  *
5703  * @rdev: radeon_device pointer
5704  *
5705  * Tear down any asic specific VM setup (CIK).
5706  */
5707 void cik_vm_fini(struct radeon_device *rdev)
5708 {
5709 }
5710 
5711 /**
5712  * cik_vm_decode_fault - print human readable fault info
5713  *
5714  * @rdev: radeon_device pointer
5715  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5716  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5717  *
5718  * Print human readable fault information (CIK).
5719  */
5720 static void cik_vm_decode_fault(struct radeon_device *rdev,
5721 				u32 status, u32 addr, u32 mc_client)
5722 {
5723 	u32 mc_id;
5724 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5725 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5726 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5727 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5728 
5729 	if (rdev->family == CHIP_HAWAII)
5730 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5731 	else
5732 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5733 
5734 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5735 	       protections, vmid, addr,
5736 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5737 	       block, mc_client, mc_id);
5738 }
5739 
5740 /**
5741  * cik_vm_flush - cik vm flush using the CP
5742  *
5743  * @rdev: radeon_device pointer
5744  *
5745  * Update the page table base and flush the VM TLB
5746  * using the CP (CIK).
5747  */
5748 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5749 		  unsigned vm_id, uint64_t pd_addr)
5750 {
5751 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5752 
5753 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5754 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5755 				 WRITE_DATA_DST_SEL(0)));
5756 	if (vm_id < 8) {
5757 		radeon_ring_write(ring,
5758 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5759 	} else {
5760 		radeon_ring_write(ring,
5761 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5762 	}
5763 	radeon_ring_write(ring, 0);
5764 	radeon_ring_write(ring, pd_addr >> 12);
5765 
5766 	/* update SH_MEM_* regs */
5767 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5768 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5769 				 WRITE_DATA_DST_SEL(0)));
5770 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5771 	radeon_ring_write(ring, 0);
5772 	radeon_ring_write(ring, VMID(vm_id));
5773 
5774 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5775 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5776 				 WRITE_DATA_DST_SEL(0)));
5777 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5778 	radeon_ring_write(ring, 0);
5779 
5780 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5781 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5782 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5783 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5784 
5785 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5786 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5787 				 WRITE_DATA_DST_SEL(0)));
5788 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5789 	radeon_ring_write(ring, 0);
5790 	radeon_ring_write(ring, VMID(0));
5791 
5792 	/* HDP flush */
5793 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5794 
5795 	/* bits 0-15 are the VM contexts0-15 */
5796 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5797 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5798 				 WRITE_DATA_DST_SEL(0)));
5799 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5800 	radeon_ring_write(ring, 0);
5801 	radeon_ring_write(ring, 1 << vm_id);
5802 
5803 	/* wait for the invalidate to complete */
5804 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5805 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5806 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5807 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5808 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5809 	radeon_ring_write(ring, 0);
5810 	radeon_ring_write(ring, 0); /* ref */
5811 	radeon_ring_write(ring, 0); /* mask */
5812 	radeon_ring_write(ring, 0x20); /* poll interval */
5813 
5814 	/* compute doesn't have PFP */
5815 	if (usepfp) {
5816 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5817 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5818 		radeon_ring_write(ring, 0x0);
5819 	}
5820 }
5821 
5822 /*
5823  * RLC
5824  * The RLC is a multi-purpose microengine that handles a
5825  * variety of functions, the most important of which is
5826  * the interrupt controller.
5827  */
5828 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5829 					  bool enable)
5830 {
5831 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5832 
5833 	if (enable)
5834 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5835 	else
5836 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5837 	WREG32(CP_INT_CNTL_RING0, tmp);
5838 }
5839 
5840 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5841 {
5842 	u32 tmp;
5843 
5844 	tmp = RREG32(RLC_LB_CNTL);
5845 	if (enable)
5846 		tmp |= LOAD_BALANCE_ENABLE;
5847 	else
5848 		tmp &= ~LOAD_BALANCE_ENABLE;
5849 	WREG32(RLC_LB_CNTL, tmp);
5850 }
5851 
5852 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5853 {
5854 	u32 i, j, k;
5855 	u32 mask;
5856 
5857 	mutex_lock(&rdev->grbm_idx_mutex);
5858 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5859 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5860 			cik_select_se_sh(rdev, i, j);
5861 			for (k = 0; k < rdev->usec_timeout; k++) {
5862 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5863 					break;
5864 				udelay(1);
5865 			}
5866 		}
5867 	}
5868 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5869 	mutex_unlock(&rdev->grbm_idx_mutex);
5870 
5871 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5872 	for (k = 0; k < rdev->usec_timeout; k++) {
5873 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5874 			break;
5875 		udelay(1);
5876 	}
5877 }
5878 
5879 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5880 {
5881 	u32 tmp;
5882 
5883 	tmp = RREG32(RLC_CNTL);
5884 	if (tmp != rlc)
5885 		WREG32(RLC_CNTL, rlc);
5886 }
5887 
5888 static u32 cik_halt_rlc(struct radeon_device *rdev)
5889 {
5890 	u32 data, orig;
5891 
5892 	orig = data = RREG32(RLC_CNTL);
5893 
5894 	if (data & RLC_ENABLE) {
5895 		u32 i;
5896 
5897 		data &= ~RLC_ENABLE;
5898 		WREG32(RLC_CNTL, data);
5899 
5900 		for (i = 0; i < rdev->usec_timeout; i++) {
5901 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5902 				break;
5903 			udelay(1);
5904 		}
5905 
5906 		cik_wait_for_rlc_serdes(rdev);
5907 	}
5908 
5909 	return orig;
5910 }
5911 
5912 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5913 {
5914 	u32 tmp, i, mask;
5915 
5916 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5917 	WREG32(RLC_GPR_REG2, tmp);
5918 
5919 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5920 	for (i = 0; i < rdev->usec_timeout; i++) {
5921 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5922 			break;
5923 		udelay(1);
5924 	}
5925 
5926 	for (i = 0; i < rdev->usec_timeout; i++) {
5927 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5928 			break;
5929 		udelay(1);
5930 	}
5931 }
5932 
5933 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5934 {
5935 	u32 tmp;
5936 
5937 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5938 	WREG32(RLC_GPR_REG2, tmp);
5939 }
5940 
5941 /**
5942  * cik_rlc_stop - stop the RLC ME
5943  *
5944  * @rdev: radeon_device pointer
5945  *
5946  * Halt the RLC ME (MicroEngine) (CIK).
5947  */
5948 static void cik_rlc_stop(struct radeon_device *rdev)
5949 {
5950 	WREG32(RLC_CNTL, 0);
5951 
5952 	cik_enable_gui_idle_interrupt(rdev, false);
5953 
5954 	cik_wait_for_rlc_serdes(rdev);
5955 }
5956 
5957 /**
5958  * cik_rlc_start - start the RLC ME
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Unhalt the RLC ME (MicroEngine) (CIK).
5963  */
5964 static void cik_rlc_start(struct radeon_device *rdev)
5965 {
5966 	WREG32(RLC_CNTL, RLC_ENABLE);
5967 
5968 	cik_enable_gui_idle_interrupt(rdev, true);
5969 
5970 	udelay(50);
5971 }
5972 
5973 /**
5974  * cik_rlc_resume - setup the RLC hw
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Initialize the RLC registers, load the ucode,
5979  * and start the RLC (CIK).
5980  * Returns 0 for success, -EINVAL if the ucode is not available.
5981  */
5982 static int cik_rlc_resume(struct radeon_device *rdev)
5983 {
5984 	u32 i, size, tmp;
5985 
5986 	if (!rdev->rlc_fw)
5987 		return -EINVAL;
5988 
5989 	cik_rlc_stop(rdev);
5990 
5991 	/* disable CG */
5992 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5993 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5994 
5995 	si_rlc_reset(rdev);
5996 
5997 	cik_init_pg(rdev);
5998 
5999 	cik_init_cg(rdev);
6000 
6001 	WREG32(RLC_LB_CNTR_INIT, 0);
6002 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6003 
6004 	mutex_lock(&rdev->grbm_idx_mutex);
6005 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6006 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6007 	WREG32(RLC_LB_PARAMS, 0x00600408);
6008 	WREG32(RLC_LB_CNTL, 0x80000004);
6009 	mutex_unlock(&rdev->grbm_idx_mutex);
6010 
6011 	WREG32(RLC_MC_CNTL, 0);
6012 	WREG32(RLC_UCODE_CNTL, 0);
6013 
6014 	if (rdev->new_fw) {
6015 		const struct rlc_firmware_header_v1_0 *hdr =
6016 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6017 		const __le32 *fw_data = (const __le32 *)
6018 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6019 
6020 		radeon_ucode_print_rlc_hdr(&hdr->header);
6021 
6022 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6023 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6024 		for (i = 0; i < size; i++)
6025 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6026 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6027 	} else {
6028 		const __be32 *fw_data;
6029 
6030 		switch (rdev->family) {
6031 		case CHIP_BONAIRE:
6032 		case CHIP_HAWAII:
6033 		default:
6034 			size = BONAIRE_RLC_UCODE_SIZE;
6035 			break;
6036 		case CHIP_KAVERI:
6037 			size = KV_RLC_UCODE_SIZE;
6038 			break;
6039 		case CHIP_KABINI:
6040 			size = KB_RLC_UCODE_SIZE;
6041 			break;
6042 		case CHIP_MULLINS:
6043 			size = ML_RLC_UCODE_SIZE;
6044 			break;
6045 		}
6046 
6047 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6048 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6049 		for (i = 0; i < size; i++)
6050 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6051 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6052 	}
6053 
6054 	/* XXX - find out what chips support lbpw */
6055 	cik_enable_lbpw(rdev, false);
6056 
6057 	if (rdev->family == CHIP_BONAIRE)
6058 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6059 
6060 	cik_rlc_start(rdev);
6061 
6062 	return 0;
6063 }
6064 
6065 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6066 {
6067 	u32 data, orig, tmp, tmp2;
6068 
6069 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6070 
6071 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6072 		cik_enable_gui_idle_interrupt(rdev, true);
6073 
6074 		tmp = cik_halt_rlc(rdev);
6075 
6076 		mutex_lock(&rdev->grbm_idx_mutex);
6077 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6078 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6079 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6080 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6081 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6082 		mutex_unlock(&rdev->grbm_idx_mutex);
6083 
6084 		cik_update_rlc(rdev, tmp);
6085 
6086 		data |= CGCG_EN | CGLS_EN;
6087 	} else {
6088 		cik_enable_gui_idle_interrupt(rdev, false);
6089 
6090 		RREG32(CB_CGTT_SCLK_CTRL);
6091 		RREG32(CB_CGTT_SCLK_CTRL);
6092 		RREG32(CB_CGTT_SCLK_CTRL);
6093 		RREG32(CB_CGTT_SCLK_CTRL);
6094 
6095 		data &= ~(CGCG_EN | CGLS_EN);
6096 	}
6097 
6098 	if (orig != data)
6099 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6100 
6101 }
6102 
6103 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6104 {
6105 	u32 data, orig, tmp = 0;
6106 
6107 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6108 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6109 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6110 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6111 				data |= CP_MEM_LS_EN;
6112 				if (orig != data)
6113 					WREG32(CP_MEM_SLP_CNTL, data);
6114 			}
6115 		}
6116 
6117 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6118 		data |= 0x00000001;
6119 		data &= 0xfffffffd;
6120 		if (orig != data)
6121 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6122 
6123 		tmp = cik_halt_rlc(rdev);
6124 
6125 		mutex_lock(&rdev->grbm_idx_mutex);
6126 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6127 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6128 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6129 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6130 		WREG32(RLC_SERDES_WR_CTRL, data);
6131 		mutex_unlock(&rdev->grbm_idx_mutex);
6132 
6133 		cik_update_rlc(rdev, tmp);
6134 
6135 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6136 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6137 			data &= ~SM_MODE_MASK;
6138 			data |= SM_MODE(0x2);
6139 			data |= SM_MODE_ENABLE;
6140 			data &= ~CGTS_OVERRIDE;
6141 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6142 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6143 				data &= ~CGTS_LS_OVERRIDE;
6144 			data &= ~ON_MONITOR_ADD_MASK;
6145 			data |= ON_MONITOR_ADD_EN;
6146 			data |= ON_MONITOR_ADD(0x96);
6147 			if (orig != data)
6148 				WREG32(CGTS_SM_CTRL_REG, data);
6149 		}
6150 	} else {
6151 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6152 		data |= 0x00000003;
6153 		if (orig != data)
6154 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6155 
6156 		data = RREG32(RLC_MEM_SLP_CNTL);
6157 		if (data & RLC_MEM_LS_EN) {
6158 			data &= ~RLC_MEM_LS_EN;
6159 			WREG32(RLC_MEM_SLP_CNTL, data);
6160 		}
6161 
6162 		data = RREG32(CP_MEM_SLP_CNTL);
6163 		if (data & CP_MEM_LS_EN) {
6164 			data &= ~CP_MEM_LS_EN;
6165 			WREG32(CP_MEM_SLP_CNTL, data);
6166 		}
6167 
6168 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6169 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6170 		if (orig != data)
6171 			WREG32(CGTS_SM_CTRL_REG, data);
6172 
6173 		tmp = cik_halt_rlc(rdev);
6174 
6175 		mutex_lock(&rdev->grbm_idx_mutex);
6176 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6177 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6178 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6179 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6180 		WREG32(RLC_SERDES_WR_CTRL, data);
6181 		mutex_unlock(&rdev->grbm_idx_mutex);
6182 
6183 		cik_update_rlc(rdev, tmp);
6184 	}
6185 }
6186 
6187 static const u32 mc_cg_registers[] =
6188 {
6189 	MC_HUB_MISC_HUB_CG,
6190 	MC_HUB_MISC_SIP_CG,
6191 	MC_HUB_MISC_VM_CG,
6192 	MC_XPB_CLK_GAT,
6193 	ATC_MISC_CG,
6194 	MC_CITF_MISC_WR_CG,
6195 	MC_CITF_MISC_RD_CG,
6196 	MC_CITF_MISC_VM_CG,
6197 	VM_L2_CG,
6198 };
6199 
6200 static void cik_enable_mc_ls(struct radeon_device *rdev,
6201 			     bool enable)
6202 {
6203 	int i;
6204 	u32 orig, data;
6205 
6206 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6207 		orig = data = RREG32(mc_cg_registers[i]);
6208 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6209 			data |= MC_LS_ENABLE;
6210 		else
6211 			data &= ~MC_LS_ENABLE;
6212 		if (data != orig)
6213 			WREG32(mc_cg_registers[i], data);
6214 	}
6215 }
6216 
6217 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6218 			       bool enable)
6219 {
6220 	int i;
6221 	u32 orig, data;
6222 
6223 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6224 		orig = data = RREG32(mc_cg_registers[i]);
6225 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6226 			data |= MC_CG_ENABLE;
6227 		else
6228 			data &= ~MC_CG_ENABLE;
6229 		if (data != orig)
6230 			WREG32(mc_cg_registers[i], data);
6231 	}
6232 }
6233 
6234 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6235 				 bool enable)
6236 {
6237 	u32 orig, data;
6238 
6239 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6240 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6241 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6242 	} else {
6243 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6244 		data |= 0xff000000;
6245 		if (data != orig)
6246 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6247 
6248 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6249 		data |= 0xff000000;
6250 		if (data != orig)
6251 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6252 	}
6253 }
6254 
6255 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6256 				 bool enable)
6257 {
6258 	u32 orig, data;
6259 
6260 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6261 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6262 		data |= 0x100;
6263 		if (orig != data)
6264 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6265 
6266 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6267 		data |= 0x100;
6268 		if (orig != data)
6269 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6270 	} else {
6271 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6272 		data &= ~0x100;
6273 		if (orig != data)
6274 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6275 
6276 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6277 		data &= ~0x100;
6278 		if (orig != data)
6279 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6280 	}
6281 }
6282 
6283 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6284 				bool enable)
6285 {
6286 	u32 orig, data;
6287 
6288 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6289 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6290 		data = 0xfff;
6291 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6292 
6293 		orig = data = RREG32(UVD_CGC_CTRL);
6294 		data |= DCM;
6295 		if (orig != data)
6296 			WREG32(UVD_CGC_CTRL, data);
6297 	} else {
6298 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6299 		data &= ~0xfff;
6300 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6301 
6302 		orig = data = RREG32(UVD_CGC_CTRL);
6303 		data &= ~DCM;
6304 		if (orig != data)
6305 			WREG32(UVD_CGC_CTRL, data);
6306 	}
6307 }
6308 
6309 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6310 			       bool enable)
6311 {
6312 	u32 orig, data;
6313 
6314 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6315 
6316 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6317 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6318 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6319 	else
6320 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6321 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6322 
6323 	if (orig != data)
6324 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6325 }
6326 
6327 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6328 				bool enable)
6329 {
6330 	u32 orig, data;
6331 
6332 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6333 
6334 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6335 		data &= ~CLOCK_GATING_DIS;
6336 	else
6337 		data |= CLOCK_GATING_DIS;
6338 
6339 	if (orig != data)
6340 		WREG32(HDP_HOST_PATH_CNTL, data);
6341 }
6342 
6343 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6344 			      bool enable)
6345 {
6346 	u32 orig, data;
6347 
6348 	orig = data = RREG32(HDP_MEM_POWER_LS);
6349 
6350 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6351 		data |= HDP_LS_ENABLE;
6352 	else
6353 		data &= ~HDP_LS_ENABLE;
6354 
6355 	if (orig != data)
6356 		WREG32(HDP_MEM_POWER_LS, data);
6357 }
6358 
6359 void cik_update_cg(struct radeon_device *rdev,
6360 		   u32 block, bool enable)
6361 {
6362 
6363 	if (block & RADEON_CG_BLOCK_GFX) {
6364 		cik_enable_gui_idle_interrupt(rdev, false);
6365 		/* order matters! */
6366 		if (enable) {
6367 			cik_enable_mgcg(rdev, true);
6368 			cik_enable_cgcg(rdev, true);
6369 		} else {
6370 			cik_enable_cgcg(rdev, false);
6371 			cik_enable_mgcg(rdev, false);
6372 		}
6373 		cik_enable_gui_idle_interrupt(rdev, true);
6374 	}
6375 
6376 	if (block & RADEON_CG_BLOCK_MC) {
6377 		if (!(rdev->flags & RADEON_IS_IGP)) {
6378 			cik_enable_mc_mgcg(rdev, enable);
6379 			cik_enable_mc_ls(rdev, enable);
6380 		}
6381 	}
6382 
6383 	if (block & RADEON_CG_BLOCK_SDMA) {
6384 		cik_enable_sdma_mgcg(rdev, enable);
6385 		cik_enable_sdma_mgls(rdev, enable);
6386 	}
6387 
6388 	if (block & RADEON_CG_BLOCK_BIF) {
6389 		cik_enable_bif_mgls(rdev, enable);
6390 	}
6391 
6392 	if (block & RADEON_CG_BLOCK_UVD) {
6393 		if (rdev->has_uvd)
6394 			cik_enable_uvd_mgcg(rdev, enable);
6395 	}
6396 
6397 	if (block & RADEON_CG_BLOCK_HDP) {
6398 		cik_enable_hdp_mgcg(rdev, enable);
6399 		cik_enable_hdp_ls(rdev, enable);
6400 	}
6401 
6402 	if (block & RADEON_CG_BLOCK_VCE) {
6403 		vce_v2_0_enable_mgcg(rdev, enable);
6404 	}
6405 }
6406 
6407 static void cik_init_cg(struct radeon_device *rdev)
6408 {
6409 
6410 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6411 
6412 	if (rdev->has_uvd)
6413 		si_init_uvd_internal_cg(rdev);
6414 
6415 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6416 			     RADEON_CG_BLOCK_SDMA |
6417 			     RADEON_CG_BLOCK_BIF |
6418 			     RADEON_CG_BLOCK_UVD |
6419 			     RADEON_CG_BLOCK_HDP), true);
6420 }
6421 
6422 static void cik_fini_cg(struct radeon_device *rdev)
6423 {
6424 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6425 			     RADEON_CG_BLOCK_SDMA |
6426 			     RADEON_CG_BLOCK_BIF |
6427 			     RADEON_CG_BLOCK_UVD |
6428 			     RADEON_CG_BLOCK_HDP), false);
6429 
6430 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6431 }
6432 
6433 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6434 					  bool enable)
6435 {
6436 	u32 data, orig;
6437 
6438 	orig = data = RREG32(RLC_PG_CNTL);
6439 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6440 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6441 	else
6442 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6443 	if (orig != data)
6444 		WREG32(RLC_PG_CNTL, data);
6445 }
6446 
6447 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6448 					  bool enable)
6449 {
6450 	u32 data, orig;
6451 
6452 	orig = data = RREG32(RLC_PG_CNTL);
6453 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6454 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6455 	else
6456 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6457 	if (orig != data)
6458 		WREG32(RLC_PG_CNTL, data);
6459 }
6460 
6461 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6462 {
6463 	u32 data, orig;
6464 
6465 	orig = data = RREG32(RLC_PG_CNTL);
6466 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6467 		data &= ~DISABLE_CP_PG;
6468 	else
6469 		data |= DISABLE_CP_PG;
6470 	if (orig != data)
6471 		WREG32(RLC_PG_CNTL, data);
6472 }
6473 
6474 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6475 {
6476 	u32 data, orig;
6477 
6478 	orig = data = RREG32(RLC_PG_CNTL);
6479 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6480 		data &= ~DISABLE_GDS_PG;
6481 	else
6482 		data |= DISABLE_GDS_PG;
6483 	if (orig != data)
6484 		WREG32(RLC_PG_CNTL, data);
6485 }
6486 
6487 #define CP_ME_TABLE_SIZE    96
6488 #define CP_ME_TABLE_OFFSET  2048
6489 #define CP_MEC_TABLE_OFFSET 4096
6490 
6491 void cik_init_cp_pg_table(struct radeon_device *rdev)
6492 {
6493 	volatile u32 *dst_ptr;
6494 	int me, i, max_me = 4;
6495 	u32 bo_offset = 0;
6496 	u32 table_offset, table_size;
6497 
6498 	if (rdev->family == CHIP_KAVERI)
6499 		max_me = 5;
6500 
6501 	if (rdev->rlc.cp_table_ptr == NULL)
6502 		return;
6503 
6504 	/* write the cp table buffer */
6505 	dst_ptr = rdev->rlc.cp_table_ptr;
6506 	for (me = 0; me < max_me; me++) {
6507 		if (rdev->new_fw) {
6508 			const __le32 *fw_data;
6509 			const struct gfx_firmware_header_v1_0 *hdr;
6510 
6511 			if (me == 0) {
6512 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6513 				fw_data = (const __le32 *)
6514 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6515 				table_offset = le32_to_cpu(hdr->jt_offset);
6516 				table_size = le32_to_cpu(hdr->jt_size);
6517 			} else if (me == 1) {
6518 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6519 				fw_data = (const __le32 *)
6520 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6521 				table_offset = le32_to_cpu(hdr->jt_offset);
6522 				table_size = le32_to_cpu(hdr->jt_size);
6523 			} else if (me == 2) {
6524 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6525 				fw_data = (const __le32 *)
6526 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6527 				table_offset = le32_to_cpu(hdr->jt_offset);
6528 				table_size = le32_to_cpu(hdr->jt_size);
6529 			} else if (me == 3) {
6530 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6531 				fw_data = (const __le32 *)
6532 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6533 				table_offset = le32_to_cpu(hdr->jt_offset);
6534 				table_size = le32_to_cpu(hdr->jt_size);
6535 			} else {
6536 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6537 				fw_data = (const __le32 *)
6538 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6539 				table_offset = le32_to_cpu(hdr->jt_offset);
6540 				table_size = le32_to_cpu(hdr->jt_size);
6541 			}
6542 
6543 			for (i = 0; i < table_size; i ++) {
6544 				dst_ptr[bo_offset + i] =
6545 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6546 			}
6547 			bo_offset += table_size;
6548 		} else {
6549 			const __be32 *fw_data;
6550 			table_size = CP_ME_TABLE_SIZE;
6551 
6552 			if (me == 0) {
6553 				fw_data = (const __be32 *)rdev->ce_fw->data;
6554 				table_offset = CP_ME_TABLE_OFFSET;
6555 			} else if (me == 1) {
6556 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6557 				table_offset = CP_ME_TABLE_OFFSET;
6558 			} else if (me == 2) {
6559 				fw_data = (const __be32 *)rdev->me_fw->data;
6560 				table_offset = CP_ME_TABLE_OFFSET;
6561 			} else {
6562 				fw_data = (const __be32 *)rdev->mec_fw->data;
6563 				table_offset = CP_MEC_TABLE_OFFSET;
6564 			}
6565 
6566 			for (i = 0; i < table_size; i ++) {
6567 				dst_ptr[bo_offset + i] =
6568 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6569 			}
6570 			bo_offset += table_size;
6571 		}
6572 	}
6573 }
6574 
6575 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6576 				bool enable)
6577 {
6578 	u32 data, orig;
6579 
6580 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6581 		orig = data = RREG32(RLC_PG_CNTL);
6582 		data |= GFX_PG_ENABLE;
6583 		if (orig != data)
6584 			WREG32(RLC_PG_CNTL, data);
6585 
6586 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6587 		data |= AUTO_PG_EN;
6588 		if (orig != data)
6589 			WREG32(RLC_AUTO_PG_CTRL, data);
6590 	} else {
6591 		orig = data = RREG32(RLC_PG_CNTL);
6592 		data &= ~GFX_PG_ENABLE;
6593 		if (orig != data)
6594 			WREG32(RLC_PG_CNTL, data);
6595 
6596 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6597 		data &= ~AUTO_PG_EN;
6598 		if (orig != data)
6599 			WREG32(RLC_AUTO_PG_CTRL, data);
6600 
6601 		data = RREG32(DB_RENDER_CONTROL);
6602 	}
6603 }
6604 
6605 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6606 {
6607 	u32 mask = 0, tmp, tmp1;
6608 	int i;
6609 
6610 	mutex_lock(&rdev->grbm_idx_mutex);
6611 	cik_select_se_sh(rdev, se, sh);
6612 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6613 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6614 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6615 	mutex_unlock(&rdev->grbm_idx_mutex);
6616 
6617 	tmp &= 0xffff0000;
6618 
6619 	tmp |= tmp1;
6620 	tmp >>= 16;
6621 
6622 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6623 		mask <<= 1;
6624 		mask |= 1;
6625 	}
6626 
6627 	return (~tmp) & mask;
6628 }
6629 
6630 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6631 {
6632 	u32 i, j, k, active_cu_number = 0;
6633 	u32 mask, counter, cu_bitmap;
6634 	u32 tmp = 0;
6635 
6636 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6637 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6638 			mask = 1;
6639 			cu_bitmap = 0;
6640 			counter = 0;
6641 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6642 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6643 					if (counter < 2)
6644 						cu_bitmap |= mask;
6645 					counter ++;
6646 				}
6647 				mask <<= 1;
6648 			}
6649 
6650 			active_cu_number += counter;
6651 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6652 		}
6653 	}
6654 
6655 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6656 
6657 	tmp = RREG32(RLC_MAX_PG_CU);
6658 	tmp &= ~MAX_PU_CU_MASK;
6659 	tmp |= MAX_PU_CU(active_cu_number);
6660 	WREG32(RLC_MAX_PG_CU, tmp);
6661 }
6662 
6663 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6664 				       bool enable)
6665 {
6666 	u32 data, orig;
6667 
6668 	orig = data = RREG32(RLC_PG_CNTL);
6669 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6670 		data |= STATIC_PER_CU_PG_ENABLE;
6671 	else
6672 		data &= ~STATIC_PER_CU_PG_ENABLE;
6673 	if (orig != data)
6674 		WREG32(RLC_PG_CNTL, data);
6675 }
6676 
6677 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6678 					bool enable)
6679 {
6680 	u32 data, orig;
6681 
6682 	orig = data = RREG32(RLC_PG_CNTL);
6683 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6684 		data |= DYN_PER_CU_PG_ENABLE;
6685 	else
6686 		data &= ~DYN_PER_CU_PG_ENABLE;
6687 	if (orig != data)
6688 		WREG32(RLC_PG_CNTL, data);
6689 }
6690 
6691 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6692 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6693 
6694 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6695 {
6696 	u32 data, orig;
6697 	u32 i;
6698 
6699 	if (rdev->rlc.cs_data) {
6700 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6701 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6702 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6703 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6704 	} else {
6705 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6706 		for (i = 0; i < 3; i++)
6707 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6708 	}
6709 	if (rdev->rlc.reg_list) {
6710 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6711 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6712 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6713 	}
6714 
6715 	orig = data = RREG32(RLC_PG_CNTL);
6716 	data |= GFX_PG_SRC;
6717 	if (orig != data)
6718 		WREG32(RLC_PG_CNTL, data);
6719 
6720 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6721 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6722 
6723 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6724 	data &= ~IDLE_POLL_COUNT_MASK;
6725 	data |= IDLE_POLL_COUNT(0x60);
6726 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6727 
6728 	data = 0x10101010;
6729 	WREG32(RLC_PG_DELAY, data);
6730 
6731 	data = RREG32(RLC_PG_DELAY_2);
6732 	data &= ~0xff;
6733 	data |= 0x3;
6734 	WREG32(RLC_PG_DELAY_2, data);
6735 
6736 	data = RREG32(RLC_AUTO_PG_CTRL);
6737 	data &= ~GRBM_REG_SGIT_MASK;
6738 	data |= GRBM_REG_SGIT(0x700);
6739 	WREG32(RLC_AUTO_PG_CTRL, data);
6740 
6741 }
6742 
6743 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6744 {
6745 	cik_enable_gfx_cgpg(rdev, enable);
6746 	cik_enable_gfx_static_mgpg(rdev, enable);
6747 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6748 }
6749 
6750 u32 cik_get_csb_size(struct radeon_device *rdev)
6751 {
6752 	u32 count = 0;
6753 	const struct cs_section_def *sect = NULL;
6754 	const struct cs_extent_def *ext = NULL;
6755 
6756 	if (rdev->rlc.cs_data == NULL)
6757 		return 0;
6758 
6759 	/* begin clear state */
6760 	count += 2;
6761 	/* context control state */
6762 	count += 3;
6763 
6764 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6765 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6766 			if (sect->id == SECT_CONTEXT)
6767 				count += 2 + ext->reg_count;
6768 			else
6769 				return 0;
6770 		}
6771 	}
6772 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6773 	count += 4;
6774 	/* end clear state */
6775 	count += 2;
6776 	/* clear state */
6777 	count += 2;
6778 
6779 	return count;
6780 }
6781 
6782 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6783 {
6784 	u32 count = 0, i;
6785 	const struct cs_section_def *sect = NULL;
6786 	const struct cs_extent_def *ext = NULL;
6787 
6788 	if (rdev->rlc.cs_data == NULL)
6789 		return;
6790 	if (buffer == NULL)
6791 		return;
6792 
6793 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6794 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6795 
6796 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6797 	buffer[count++] = cpu_to_le32(0x80000000);
6798 	buffer[count++] = cpu_to_le32(0x80000000);
6799 
6800 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6801 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6802 			if (sect->id == SECT_CONTEXT) {
6803 				buffer[count++] =
6804 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6805 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6806 				for (i = 0; i < ext->reg_count; i++)
6807 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6808 			} else {
6809 				return;
6810 			}
6811 		}
6812 	}
6813 
6814 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6815 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6816 	switch (rdev->family) {
6817 	case CHIP_BONAIRE:
6818 		buffer[count++] = cpu_to_le32(0x16000012);
6819 		buffer[count++] = cpu_to_le32(0x00000000);
6820 		break;
6821 	case CHIP_KAVERI:
6822 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6823 		buffer[count++] = cpu_to_le32(0x00000000);
6824 		break;
6825 	case CHIP_KABINI:
6826 	case CHIP_MULLINS:
6827 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6828 		buffer[count++] = cpu_to_le32(0x00000000);
6829 		break;
6830 	case CHIP_HAWAII:
6831 		buffer[count++] = cpu_to_le32(0x3a00161a);
6832 		buffer[count++] = cpu_to_le32(0x0000002e);
6833 		break;
6834 	default:
6835 		buffer[count++] = cpu_to_le32(0x00000000);
6836 		buffer[count++] = cpu_to_le32(0x00000000);
6837 		break;
6838 	}
6839 
6840 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6841 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6842 
6843 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6844 	buffer[count++] = cpu_to_le32(0);
6845 }
6846 
6847 static void cik_init_pg(struct radeon_device *rdev)
6848 {
6849 	if (rdev->pg_flags) {
6850 		cik_enable_sck_slowdown_on_pu(rdev, true);
6851 		cik_enable_sck_slowdown_on_pd(rdev, true);
6852 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6853 			cik_init_gfx_cgpg(rdev);
6854 			cik_enable_cp_pg(rdev, true);
6855 			cik_enable_gds_pg(rdev, true);
6856 		}
6857 		cik_init_ao_cu_mask(rdev);
6858 		cik_update_gfx_pg(rdev, true);
6859 	}
6860 }
6861 
6862 static void cik_fini_pg(struct radeon_device *rdev)
6863 {
6864 	if (rdev->pg_flags) {
6865 		cik_update_gfx_pg(rdev, false);
6866 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6867 			cik_enable_cp_pg(rdev, false);
6868 			cik_enable_gds_pg(rdev, false);
6869 		}
6870 	}
6871 }
6872 
6873 /*
6874  * Interrupts
6875  * Starting with r6xx, interrupts are handled via a ring buffer.
6876  * Ring buffers are areas of GPU accessible memory that the GPU
6877  * writes interrupt vectors into and the host reads vectors out of.
6878  * There is a rptr (read pointer) that determines where the
6879  * host is currently reading, and a wptr (write pointer)
6880  * which determines where the GPU has written.  When the
6881  * pointers are equal, the ring is idle.  When the GPU
6882  * writes vectors to the ring buffer, it increments the
6883  * wptr.  When there is an interrupt, the host then starts
6884  * fetching commands and processing them until the pointers are
6885  * equal again at which point it updates the rptr.
6886  */
6887 
6888 /**
6889  * cik_enable_interrupts - Enable the interrupt ring buffer
6890  *
6891  * @rdev: radeon_device pointer
6892  *
6893  * Enable the interrupt ring buffer (CIK).
6894  */
6895 static void cik_enable_interrupts(struct radeon_device *rdev)
6896 {
6897 	u32 ih_cntl = RREG32(IH_CNTL);
6898 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6899 
6900 	ih_cntl |= ENABLE_INTR;
6901 	ih_rb_cntl |= IH_RB_ENABLE;
6902 	WREG32(IH_CNTL, ih_cntl);
6903 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6904 	rdev->ih.enabled = true;
6905 }
6906 
6907 /**
6908  * cik_disable_interrupts - Disable the interrupt ring buffer
6909  *
6910  * @rdev: radeon_device pointer
6911  *
6912  * Disable the interrupt ring buffer (CIK).
6913  */
6914 static void cik_disable_interrupts(struct radeon_device *rdev)
6915 {
6916 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6917 	u32 ih_cntl = RREG32(IH_CNTL);
6918 
6919 	ih_rb_cntl &= ~IH_RB_ENABLE;
6920 	ih_cntl &= ~ENABLE_INTR;
6921 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6922 	WREG32(IH_CNTL, ih_cntl);
6923 	/* set rptr, wptr to 0 */
6924 	WREG32(IH_RB_RPTR, 0);
6925 	WREG32(IH_RB_WPTR, 0);
6926 	rdev->ih.enabled = false;
6927 	rdev->ih.rptr = 0;
6928 }
6929 
6930 /**
6931  * cik_disable_interrupt_state - Disable all interrupt sources
6932  *
6933  * @rdev: radeon_device pointer
6934  *
6935  * Clear all interrupt enable bits used by the driver (CIK).
6936  */
6937 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6938 {
6939 	u32 tmp;
6940 
6941 	/* gfx ring */
6942 	tmp = RREG32(CP_INT_CNTL_RING0) &
6943 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6944 	WREG32(CP_INT_CNTL_RING0, tmp);
6945 	/* sdma */
6946 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6947 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6948 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6949 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6950 	/* compute queues */
6951 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6952 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6953 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6954 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6955 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6956 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6957 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6958 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6959 	/* grbm */
6960 	WREG32(GRBM_INT_CNTL, 0);
6961 	/* SRBM */
6962 	WREG32(SRBM_INT_CNTL, 0);
6963 	/* vline/vblank, etc. */
6964 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6965 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6966 	if (rdev->num_crtc >= 4) {
6967 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6968 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6969 	}
6970 	if (rdev->num_crtc >= 6) {
6971 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6972 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6973 	}
6974 	/* pflip */
6975 	if (rdev->num_crtc >= 2) {
6976 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6977 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6978 	}
6979 	if (rdev->num_crtc >= 4) {
6980 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6981 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6982 	}
6983 	if (rdev->num_crtc >= 6) {
6984 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6985 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6986 	}
6987 
6988 	/* dac hotplug */
6989 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6990 
6991 	/* digital hotplug */
6992 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6993 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6994 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6995 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6996 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6997 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6998 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6999 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7000 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7001 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7002 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7004 
7005 }
7006 
7007 /**
7008  * cik_irq_init - init and enable the interrupt ring
7009  *
7010  * @rdev: radeon_device pointer
7011  *
7012  * Allocate a ring buffer for the interrupt controller,
7013  * enable the RLC, disable interrupts, enable the IH
7014  * ring buffer and enable it (CIK).
7015  * Called at device load and reume.
7016  * Returns 0 for success, errors for failure.
7017  */
7018 static int cik_irq_init(struct radeon_device *rdev)
7019 {
7020 	int ret = 0;
7021 	int rb_bufsz;
7022 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7023 
7024 	/* allocate ring */
7025 	ret = r600_ih_ring_alloc(rdev);
7026 	if (ret)
7027 		return ret;
7028 
7029 	/* disable irqs */
7030 	cik_disable_interrupts(rdev);
7031 
7032 	/* init rlc */
7033 	ret = cik_rlc_resume(rdev);
7034 	if (ret) {
7035 		r600_ih_ring_fini(rdev);
7036 		return ret;
7037 	}
7038 
7039 	/* setup interrupt control */
7040 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7041 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7042 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7043 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7044 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7045 	 */
7046 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7047 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7048 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7049 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7050 
7051 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7052 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7053 
7054 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7055 		      IH_WPTR_OVERFLOW_CLEAR |
7056 		      (rb_bufsz << 1));
7057 
7058 	if (rdev->wb.enabled)
7059 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7060 
7061 	/* set the writeback address whether it's enabled or not */
7062 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7063 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7064 
7065 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7066 
7067 	/* set rptr, wptr to 0 */
7068 	WREG32(IH_RB_RPTR, 0);
7069 	WREG32(IH_RB_WPTR, 0);
7070 
7071 	/* Default settings for IH_CNTL (disabled at first) */
7072 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7073 	/* RPTR_REARM only works if msi's are enabled */
7074 	if (rdev->msi_enabled)
7075 		ih_cntl |= RPTR_REARM;
7076 	WREG32(IH_CNTL, ih_cntl);
7077 
7078 	/* force the active interrupt state to all disabled */
7079 	cik_disable_interrupt_state(rdev);
7080 
7081 	pci_set_master(rdev->pdev);
7082 
7083 	/* enable irqs */
7084 	cik_enable_interrupts(rdev);
7085 
7086 	return ret;
7087 }
7088 
7089 /**
7090  * cik_irq_set - enable/disable interrupt sources
7091  *
7092  * @rdev: radeon_device pointer
7093  *
7094  * Enable interrupt sources on the GPU (vblanks, hpd,
7095  * etc.) (CIK).
7096  * Returns 0 for success, errors for failure.
7097  */
7098 int cik_irq_set(struct radeon_device *rdev)
7099 {
7100 	u32 cp_int_cntl;
7101 	u32 cp_m1p0;
7102 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7103 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7104 	u32 grbm_int_cntl = 0;
7105 	u32 dma_cntl, dma_cntl1;
7106 
7107 	if (!rdev->irq.installed) {
7108 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7109 		return -EINVAL;
7110 	}
7111 	/* don't enable anything if the ih is disabled */
7112 	if (!rdev->ih.enabled) {
7113 		cik_disable_interrupts(rdev);
7114 		/* force the active interrupt state to all disabled */
7115 		cik_disable_interrupt_state(rdev);
7116 		return 0;
7117 	}
7118 
7119 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7120 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7121 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7122 
7123 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7124 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7125 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129 
7130 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7131 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7132 
7133 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7134 
7135 	/* enable CP interrupts on all rings */
7136 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7137 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7138 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7139 	}
7140 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7141 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7142 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7143 		if (ring->me == 1) {
7144 			switch (ring->pipe) {
7145 			case 0:
7146 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7147 				break;
7148 			default:
7149 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7150 				break;
7151 			}
7152 		} else {
7153 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7154 		}
7155 	}
7156 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7157 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7158 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7159 		if (ring->me == 1) {
7160 			switch (ring->pipe) {
7161 			case 0:
7162 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163 				break;
7164 			default:
7165 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7166 				break;
7167 			}
7168 		} else {
7169 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7170 		}
7171 	}
7172 
7173 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7174 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7175 		dma_cntl |= TRAP_ENABLE;
7176 	}
7177 
7178 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7179 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7180 		dma_cntl1 |= TRAP_ENABLE;
7181 	}
7182 
7183 	if (rdev->irq.crtc_vblank_int[0] ||
7184 	    atomic_read(&rdev->irq.pflip[0])) {
7185 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7186 		crtc1 |= VBLANK_INTERRUPT_MASK;
7187 	}
7188 	if (rdev->irq.crtc_vblank_int[1] ||
7189 	    atomic_read(&rdev->irq.pflip[1])) {
7190 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7191 		crtc2 |= VBLANK_INTERRUPT_MASK;
7192 	}
7193 	if (rdev->irq.crtc_vblank_int[2] ||
7194 	    atomic_read(&rdev->irq.pflip[2])) {
7195 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7196 		crtc3 |= VBLANK_INTERRUPT_MASK;
7197 	}
7198 	if (rdev->irq.crtc_vblank_int[3] ||
7199 	    atomic_read(&rdev->irq.pflip[3])) {
7200 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7201 		crtc4 |= VBLANK_INTERRUPT_MASK;
7202 	}
7203 	if (rdev->irq.crtc_vblank_int[4] ||
7204 	    atomic_read(&rdev->irq.pflip[4])) {
7205 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7206 		crtc5 |= VBLANK_INTERRUPT_MASK;
7207 	}
7208 	if (rdev->irq.crtc_vblank_int[5] ||
7209 	    atomic_read(&rdev->irq.pflip[5])) {
7210 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7211 		crtc6 |= VBLANK_INTERRUPT_MASK;
7212 	}
7213 	if (rdev->irq.hpd[0]) {
7214 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7215 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7216 	}
7217 	if (rdev->irq.hpd[1]) {
7218 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7219 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7220 	}
7221 	if (rdev->irq.hpd[2]) {
7222 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7223 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7224 	}
7225 	if (rdev->irq.hpd[3]) {
7226 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7227 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7228 	}
7229 	if (rdev->irq.hpd[4]) {
7230 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7231 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232 	}
7233 	if (rdev->irq.hpd[5]) {
7234 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7235 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236 	}
7237 
7238 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7239 
7240 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7241 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7242 
7243 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7244 
7245 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7246 
7247 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7248 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7249 	if (rdev->num_crtc >= 4) {
7250 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7251 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7252 	}
7253 	if (rdev->num_crtc >= 6) {
7254 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7255 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7256 	}
7257 
7258 	if (rdev->num_crtc >= 2) {
7259 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7260 		       GRPH_PFLIP_INT_MASK);
7261 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7262 		       GRPH_PFLIP_INT_MASK);
7263 	}
7264 	if (rdev->num_crtc >= 4) {
7265 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7266 		       GRPH_PFLIP_INT_MASK);
7267 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7268 		       GRPH_PFLIP_INT_MASK);
7269 	}
7270 	if (rdev->num_crtc >= 6) {
7271 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7272 		       GRPH_PFLIP_INT_MASK);
7273 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7274 		       GRPH_PFLIP_INT_MASK);
7275 	}
7276 
7277 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7278 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7279 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7280 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7281 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7282 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7283 
7284 	/* posting read */
7285 	RREG32(SRBM_STATUS);
7286 
7287 	return 0;
7288 }
7289 
7290 /**
7291  * cik_irq_ack - ack interrupt sources
7292  *
7293  * @rdev: radeon_device pointer
7294  *
7295  * Ack interrupt sources on the GPU (vblanks, hpd,
7296  * etc.) (CIK).  Certain interrupts sources are sw
7297  * generated and do not require an explicit ack.
7298  */
7299 static inline void cik_irq_ack(struct radeon_device *rdev)
7300 {
7301 	u32 tmp;
7302 
7303 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7304 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7305 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7306 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7307 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7308 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7309 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7310 
7311 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7312 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7313 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7314 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7315 	if (rdev->num_crtc >= 4) {
7316 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7317 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7318 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7319 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7320 	}
7321 	if (rdev->num_crtc >= 6) {
7322 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7323 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7324 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7325 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7326 	}
7327 
7328 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7329 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7330 		       GRPH_PFLIP_INT_CLEAR);
7331 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7332 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7333 		       GRPH_PFLIP_INT_CLEAR);
7334 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7335 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7336 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7337 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7338 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7339 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7340 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7341 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7342 
7343 	if (rdev->num_crtc >= 4) {
7344 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7345 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7346 			       GRPH_PFLIP_INT_CLEAR);
7347 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7348 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7349 			       GRPH_PFLIP_INT_CLEAR);
7350 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7351 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7352 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7353 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7354 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7355 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7356 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7357 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7358 	}
7359 
7360 	if (rdev->num_crtc >= 6) {
7361 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7362 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7363 			       GRPH_PFLIP_INT_CLEAR);
7364 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7365 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7366 			       GRPH_PFLIP_INT_CLEAR);
7367 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7368 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7369 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7370 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7371 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7372 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7373 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7374 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7375 	}
7376 
7377 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7378 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7379 		tmp |= DC_HPDx_INT_ACK;
7380 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7381 	}
7382 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7383 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7384 		tmp |= DC_HPDx_INT_ACK;
7385 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7386 	}
7387 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7388 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7389 		tmp |= DC_HPDx_INT_ACK;
7390 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7391 	}
7392 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7393 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7394 		tmp |= DC_HPDx_INT_ACK;
7395 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7396 	}
7397 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7398 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7399 		tmp |= DC_HPDx_INT_ACK;
7400 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7401 	}
7402 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7403 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7404 		tmp |= DC_HPDx_INT_ACK;
7405 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7406 	}
7407 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7408 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7409 		tmp |= DC_HPDx_RX_INT_ACK;
7410 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7411 	}
7412 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7413 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7414 		tmp |= DC_HPDx_RX_INT_ACK;
7415 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7416 	}
7417 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7418 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7419 		tmp |= DC_HPDx_RX_INT_ACK;
7420 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7421 	}
7422 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7423 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7424 		tmp |= DC_HPDx_RX_INT_ACK;
7425 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7426 	}
7427 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7428 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7429 		tmp |= DC_HPDx_RX_INT_ACK;
7430 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7431 	}
7432 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7433 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7434 		tmp |= DC_HPDx_RX_INT_ACK;
7435 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7436 	}
7437 }
7438 
7439 /**
7440  * cik_irq_disable - disable interrupts
7441  *
7442  * @rdev: radeon_device pointer
7443  *
7444  * Disable interrupts on the hw (CIK).
7445  */
7446 static void cik_irq_disable(struct radeon_device *rdev)
7447 {
7448 	cik_disable_interrupts(rdev);
7449 	/* Wait and acknowledge irq */
7450 	mdelay(1);
7451 	cik_irq_ack(rdev);
7452 	cik_disable_interrupt_state(rdev);
7453 }
7454 
7455 /**
7456  * cik_irq_disable - disable interrupts for suspend
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts and stop the RLC (CIK).
7461  * Used for suspend.
7462  */
7463 static void cik_irq_suspend(struct radeon_device *rdev)
7464 {
7465 	cik_irq_disable(rdev);
7466 	cik_rlc_stop(rdev);
7467 }
7468 
7469 /**
7470  * cik_irq_fini - tear down interrupt support
7471  *
7472  * @rdev: radeon_device pointer
7473  *
7474  * Disable interrupts on the hw and free the IH ring
7475  * buffer (CIK).
7476  * Used for driver unload.
7477  */
7478 static void cik_irq_fini(struct radeon_device *rdev)
7479 {
7480 	cik_irq_suspend(rdev);
7481 	r600_ih_ring_fini(rdev);
7482 }
7483 
7484 /**
7485  * cik_get_ih_wptr - get the IH ring buffer wptr
7486  *
7487  * @rdev: radeon_device pointer
7488  *
7489  * Get the IH ring buffer wptr from either the register
7490  * or the writeback memory buffer (CIK).  Also check for
7491  * ring buffer overflow and deal with it.
7492  * Used by cik_irq_process().
7493  * Returns the value of the wptr.
7494  */
7495 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7496 {
7497 	u32 wptr, tmp;
7498 
7499 	if (rdev->wb.enabled)
7500 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7501 	else
7502 		wptr = RREG32(IH_RB_WPTR);
7503 
7504 	if (wptr & RB_OVERFLOW) {
7505 		wptr &= ~RB_OVERFLOW;
7506 		/* When a ring buffer overflow happen start parsing interrupt
7507 		 * from the last not overwritten vector (wptr + 16). Hopefully
7508 		 * this should allow us to catchup.
7509 		 */
7510 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7511 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7512 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7513 		tmp = RREG32(IH_RB_CNTL);
7514 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7515 		WREG32(IH_RB_CNTL, tmp);
7516 	}
7517 	return (wptr & rdev->ih.ptr_mask);
7518 }
7519 
7520 /*        CIK IV Ring
7521  * Each IV ring entry is 128 bits:
7522  * [7:0]    - interrupt source id
7523  * [31:8]   - reserved
7524  * [59:32]  - interrupt source data
7525  * [63:60]  - reserved
7526  * [71:64]  - RINGID
7527  *            CP:
7528  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7529  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7530  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7531  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7532  *            PIPE_ID - ME0 0=3D
7533  *                    - ME1&2 compute dispatcher (4 pipes each)
7534  *            SDMA:
7535  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7536  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7537  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7538  * [79:72]  - VMID
7539  * [95:80]  - PASID
7540  * [127:96] - reserved
7541  */
7542 /**
7543  * cik_irq_process - interrupt handler
7544  *
7545  * @rdev: radeon_device pointer
7546  *
7547  * Interrupt hander (CIK).  Walk the IH ring,
7548  * ack interrupts and schedule work to handle
7549  * interrupt events.
7550  * Returns irq process return code.
7551  */
7552 int cik_irq_process(struct radeon_device *rdev)
7553 {
7554 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7555 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7556 	u32 wptr;
7557 	u32 rptr;
7558 	u32 src_id, src_data, ring_id;
7559 	u8 me_id, pipe_id, queue_id;
7560 	u32 ring_index;
7561 	bool queue_hotplug = false;
7562 	bool queue_dp = false;
7563 	bool queue_reset = false;
7564 	u32 addr, status, mc_client;
7565 	bool queue_thermal = false;
7566 
7567 	if (!rdev->ih.enabled || rdev->shutdown)
7568 		return IRQ_NONE;
7569 
7570 	wptr = cik_get_ih_wptr(rdev);
7571 
7572 restart_ih:
7573 	/* is somebody else already processing irqs? */
7574 	if (atomic_xchg(&rdev->ih.lock, 1))
7575 		return IRQ_NONE;
7576 
7577 	rptr = rdev->ih.rptr;
7578 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7579 
7580 	/* Order reading of wptr vs. reading of IH ring data */
7581 	rmb();
7582 
7583 	/* display interrupts */
7584 	cik_irq_ack(rdev);
7585 
7586 	while (rptr != wptr) {
7587 		/* wptr/rptr are in bytes! */
7588 		ring_index = rptr / 4;
7589 
7590 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7591 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7592 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7593 
7594 		switch (src_id) {
7595 		case 1: /* D1 vblank/vline */
7596 			switch (src_data) {
7597 			case 0: /* D1 vblank */
7598 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7599 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7600 
7601 				if (rdev->irq.crtc_vblank_int[0]) {
7602 					drm_handle_vblank(rdev->ddev, 0);
7603 					rdev->pm.vblank_sync = true;
7604 					wake_up(&rdev->irq.vblank_queue);
7605 				}
7606 				if (atomic_read(&rdev->irq.pflip[0]))
7607 					radeon_crtc_handle_vblank(rdev, 0);
7608 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7609 				DRM_DEBUG("IH: D1 vblank\n");
7610 
7611 				break;
7612 			case 1: /* D1 vline */
7613 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7614 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7615 
7616 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7617 				DRM_DEBUG("IH: D1 vline\n");
7618 
7619 				break;
7620 			default:
7621 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7622 				break;
7623 			}
7624 			break;
7625 		case 2: /* D2 vblank/vline */
7626 			switch (src_data) {
7627 			case 0: /* D2 vblank */
7628 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7629 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7630 
7631 				if (rdev->irq.crtc_vblank_int[1]) {
7632 					drm_handle_vblank(rdev->ddev, 1);
7633 					rdev->pm.vblank_sync = true;
7634 					wake_up(&rdev->irq.vblank_queue);
7635 				}
7636 				if (atomic_read(&rdev->irq.pflip[1]))
7637 					radeon_crtc_handle_vblank(rdev, 1);
7638 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7639 				DRM_DEBUG("IH: D2 vblank\n");
7640 
7641 				break;
7642 			case 1: /* D2 vline */
7643 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7644 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7645 
7646 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7647 				DRM_DEBUG("IH: D2 vline\n");
7648 
7649 				break;
7650 			default:
7651 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7652 				break;
7653 			}
7654 			break;
7655 		case 3: /* D3 vblank/vline */
7656 			switch (src_data) {
7657 			case 0: /* D3 vblank */
7658 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7659 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7660 
7661 				if (rdev->irq.crtc_vblank_int[2]) {
7662 					drm_handle_vblank(rdev->ddev, 2);
7663 					rdev->pm.vblank_sync = true;
7664 					wake_up(&rdev->irq.vblank_queue);
7665 				}
7666 				if (atomic_read(&rdev->irq.pflip[2]))
7667 					radeon_crtc_handle_vblank(rdev, 2);
7668 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7669 				DRM_DEBUG("IH: D3 vblank\n");
7670 
7671 				break;
7672 			case 1: /* D3 vline */
7673 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7674 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7675 
7676 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7677 				DRM_DEBUG("IH: D3 vline\n");
7678 
7679 				break;
7680 			default:
7681 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7682 				break;
7683 			}
7684 			break;
7685 		case 4: /* D4 vblank/vline */
7686 			switch (src_data) {
7687 			case 0: /* D4 vblank */
7688 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7689 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7690 
7691 				if (rdev->irq.crtc_vblank_int[3]) {
7692 					drm_handle_vblank(rdev->ddev, 3);
7693 					rdev->pm.vblank_sync = true;
7694 					wake_up(&rdev->irq.vblank_queue);
7695 				}
7696 				if (atomic_read(&rdev->irq.pflip[3]))
7697 					radeon_crtc_handle_vblank(rdev, 3);
7698 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7699 				DRM_DEBUG("IH: D4 vblank\n");
7700 
7701 				break;
7702 			case 1: /* D4 vline */
7703 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7704 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7705 
7706 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7707 				DRM_DEBUG("IH: D4 vline\n");
7708 
7709 				break;
7710 			default:
7711 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7712 				break;
7713 			}
7714 			break;
7715 		case 5: /* D5 vblank/vline */
7716 			switch (src_data) {
7717 			case 0: /* D5 vblank */
7718 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7719 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7720 
7721 				if (rdev->irq.crtc_vblank_int[4]) {
7722 					drm_handle_vblank(rdev->ddev, 4);
7723 					rdev->pm.vblank_sync = true;
7724 					wake_up(&rdev->irq.vblank_queue);
7725 				}
7726 				if (atomic_read(&rdev->irq.pflip[4]))
7727 					radeon_crtc_handle_vblank(rdev, 4);
7728 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7729 				DRM_DEBUG("IH: D5 vblank\n");
7730 
7731 				break;
7732 			case 1: /* D5 vline */
7733 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7734 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7735 
7736 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7737 				DRM_DEBUG("IH: D5 vline\n");
7738 
7739 				break;
7740 			default:
7741 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7742 				break;
7743 			}
7744 			break;
7745 		case 6: /* D6 vblank/vline */
7746 			switch (src_data) {
7747 			case 0: /* D6 vblank */
7748 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7749 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7750 
7751 				if (rdev->irq.crtc_vblank_int[5]) {
7752 					drm_handle_vblank(rdev->ddev, 5);
7753 					rdev->pm.vblank_sync = true;
7754 					wake_up(&rdev->irq.vblank_queue);
7755 				}
7756 				if (atomic_read(&rdev->irq.pflip[5]))
7757 					radeon_crtc_handle_vblank(rdev, 5);
7758 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7759 				DRM_DEBUG("IH: D6 vblank\n");
7760 
7761 				break;
7762 			case 1: /* D6 vline */
7763 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7764 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7765 
7766 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7767 				DRM_DEBUG("IH: D6 vline\n");
7768 
7769 				break;
7770 			default:
7771 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7772 				break;
7773 			}
7774 			break;
7775 		case 8: /* D1 page flip */
7776 		case 10: /* D2 page flip */
7777 		case 12: /* D3 page flip */
7778 		case 14: /* D4 page flip */
7779 		case 16: /* D5 page flip */
7780 		case 18: /* D6 page flip */
7781 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7782 			if (radeon_use_pflipirq > 0)
7783 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7784 			break;
7785 		case 42: /* HPD hotplug */
7786 			switch (src_data) {
7787 			case 0:
7788 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7789 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7790 
7791 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7792 				queue_hotplug = true;
7793 				DRM_DEBUG("IH: HPD1\n");
7794 
7795 				break;
7796 			case 1:
7797 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7798 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7799 
7800 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7801 				queue_hotplug = true;
7802 				DRM_DEBUG("IH: HPD2\n");
7803 
7804 				break;
7805 			case 2:
7806 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7807 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7808 
7809 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7810 				queue_hotplug = true;
7811 				DRM_DEBUG("IH: HPD3\n");
7812 
7813 				break;
7814 			case 3:
7815 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7816 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7817 
7818 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7819 				queue_hotplug = true;
7820 				DRM_DEBUG("IH: HPD4\n");
7821 
7822 				break;
7823 			case 4:
7824 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7825 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7826 
7827 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7828 				queue_hotplug = true;
7829 				DRM_DEBUG("IH: HPD5\n");
7830 
7831 				break;
7832 			case 5:
7833 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7834 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7835 
7836 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7837 				queue_hotplug = true;
7838 				DRM_DEBUG("IH: HPD6\n");
7839 
7840 				break;
7841 			case 6:
7842 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7843 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7844 
7845 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7846 				queue_dp = true;
7847 				DRM_DEBUG("IH: HPD_RX 1\n");
7848 
7849 				break;
7850 			case 7:
7851 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7852 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7853 
7854 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7855 				queue_dp = true;
7856 				DRM_DEBUG("IH: HPD_RX 2\n");
7857 
7858 				break;
7859 			case 8:
7860 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7861 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7862 
7863 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7864 				queue_dp = true;
7865 				DRM_DEBUG("IH: HPD_RX 3\n");
7866 
7867 				break;
7868 			case 9:
7869 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7870 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7871 
7872 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7873 				queue_dp = true;
7874 				DRM_DEBUG("IH: HPD_RX 4\n");
7875 
7876 				break;
7877 			case 10:
7878 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7879 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7880 
7881 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7882 				queue_dp = true;
7883 				DRM_DEBUG("IH: HPD_RX 5\n");
7884 
7885 				break;
7886 			case 11:
7887 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7888 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7889 
7890 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7891 				queue_dp = true;
7892 				DRM_DEBUG("IH: HPD_RX 6\n");
7893 
7894 				break;
7895 			default:
7896 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7897 				break;
7898 			}
7899 			break;
7900 		case 96:
7901 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7902 			WREG32(SRBM_INT_ACK, 0x1);
7903 			break;
7904 		case 124: /* UVD */
7905 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7906 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7907 			break;
7908 		case 146:
7909 		case 147:
7910 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7911 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7912 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7913 			/* reset addr and status */
7914 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7915 			if (addr == 0x0 && status == 0x0)
7916 				break;
7917 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7918 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7919 				addr);
7920 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7921 				status);
7922 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7923 			break;
7924 		case 167: /* VCE */
7925 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7926 			switch (src_data) {
7927 			case 0:
7928 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7929 				break;
7930 			case 1:
7931 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7932 				break;
7933 			default:
7934 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7935 				break;
7936 			}
7937 			break;
7938 		case 176: /* GFX RB CP_INT */
7939 		case 177: /* GFX IB CP_INT */
7940 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7941 			break;
7942 		case 181: /* CP EOP event */
7943 			DRM_DEBUG("IH: CP EOP\n");
7944 			/* XXX check the bitfield order! */
7945 			me_id = (ring_id & 0x60) >> 5;
7946 			pipe_id = (ring_id & 0x18) >> 3;
7947 			queue_id = (ring_id & 0x7) >> 0;
7948 			switch (me_id) {
7949 			case 0:
7950 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7951 				break;
7952 			case 1:
7953 			case 2:
7954 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7955 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7956 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7957 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7958 				break;
7959 			}
7960 			break;
7961 		case 184: /* CP Privileged reg access */
7962 			DRM_ERROR("Illegal register access in command stream\n");
7963 			/* XXX check the bitfield order! */
7964 			me_id = (ring_id & 0x60) >> 5;
7965 			pipe_id = (ring_id & 0x18) >> 3;
7966 			queue_id = (ring_id & 0x7) >> 0;
7967 			switch (me_id) {
7968 			case 0:
7969 				/* This results in a full GPU reset, but all we need to do is soft
7970 				 * reset the CP for gfx
7971 				 */
7972 				queue_reset = true;
7973 				break;
7974 			case 1:
7975 				/* XXX compute */
7976 				queue_reset = true;
7977 				break;
7978 			case 2:
7979 				/* XXX compute */
7980 				queue_reset = true;
7981 				break;
7982 			}
7983 			break;
7984 		case 185: /* CP Privileged inst */
7985 			DRM_ERROR("Illegal instruction in command stream\n");
7986 			/* XXX check the bitfield order! */
7987 			me_id = (ring_id & 0x60) >> 5;
7988 			pipe_id = (ring_id & 0x18) >> 3;
7989 			queue_id = (ring_id & 0x7) >> 0;
7990 			switch (me_id) {
7991 			case 0:
7992 				/* This results in a full GPU reset, but all we need to do is soft
7993 				 * reset the CP for gfx
7994 				 */
7995 				queue_reset = true;
7996 				break;
7997 			case 1:
7998 				/* XXX compute */
7999 				queue_reset = true;
8000 				break;
8001 			case 2:
8002 				/* XXX compute */
8003 				queue_reset = true;
8004 				break;
8005 			}
8006 			break;
8007 		case 224: /* SDMA trap event */
8008 			/* XXX check the bitfield order! */
8009 			me_id = (ring_id & 0x3) >> 0;
8010 			queue_id = (ring_id & 0xc) >> 2;
8011 			DRM_DEBUG("IH: SDMA trap\n");
8012 			switch (me_id) {
8013 			case 0:
8014 				switch (queue_id) {
8015 				case 0:
8016 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8017 					break;
8018 				case 1:
8019 					/* XXX compute */
8020 					break;
8021 				case 2:
8022 					/* XXX compute */
8023 					break;
8024 				}
8025 				break;
8026 			case 1:
8027 				switch (queue_id) {
8028 				case 0:
8029 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8030 					break;
8031 				case 1:
8032 					/* XXX compute */
8033 					break;
8034 				case 2:
8035 					/* XXX compute */
8036 					break;
8037 				}
8038 				break;
8039 			}
8040 			break;
8041 		case 230: /* thermal low to high */
8042 			DRM_DEBUG("IH: thermal low to high\n");
8043 			rdev->pm.dpm.thermal.high_to_low = false;
8044 			queue_thermal = true;
8045 			break;
8046 		case 231: /* thermal high to low */
8047 			DRM_DEBUG("IH: thermal high to low\n");
8048 			rdev->pm.dpm.thermal.high_to_low = true;
8049 			queue_thermal = true;
8050 			break;
8051 		case 233: /* GUI IDLE */
8052 			DRM_DEBUG("IH: GUI idle\n");
8053 			break;
8054 		case 241: /* SDMA Privileged inst */
8055 		case 247: /* SDMA Privileged inst */
8056 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8057 			/* XXX check the bitfield order! */
8058 			me_id = (ring_id & 0x3) >> 0;
8059 			queue_id = (ring_id & 0xc) >> 2;
8060 			switch (me_id) {
8061 			case 0:
8062 				switch (queue_id) {
8063 				case 0:
8064 					queue_reset = true;
8065 					break;
8066 				case 1:
8067 					/* XXX compute */
8068 					queue_reset = true;
8069 					break;
8070 				case 2:
8071 					/* XXX compute */
8072 					queue_reset = true;
8073 					break;
8074 				}
8075 				break;
8076 			case 1:
8077 				switch (queue_id) {
8078 				case 0:
8079 					queue_reset = true;
8080 					break;
8081 				case 1:
8082 					/* XXX compute */
8083 					queue_reset = true;
8084 					break;
8085 				case 2:
8086 					/* XXX compute */
8087 					queue_reset = true;
8088 					break;
8089 				}
8090 				break;
8091 			}
8092 			break;
8093 		default:
8094 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8095 			break;
8096 		}
8097 
8098 		/* wptr/rptr are in bytes! */
8099 		rptr += 16;
8100 		rptr &= rdev->ih.ptr_mask;
8101 		WREG32(IH_RB_RPTR, rptr);
8102 	}
8103 	if (queue_dp)
8104 		schedule_work(&rdev->dp_work);
8105 	if (queue_hotplug)
8106 		schedule_delayed_work(&rdev->hotplug_work, 0);
8107 	if (queue_reset) {
8108 		rdev->needs_reset = true;
8109 		wake_up_all(&rdev->fence_queue);
8110 	}
8111 	if (queue_thermal)
8112 		schedule_work(&rdev->pm.dpm.thermal.work);
8113 	rdev->ih.rptr = rptr;
8114 	atomic_set(&rdev->ih.lock, 0);
8115 
8116 	/* make sure wptr hasn't changed while processing */
8117 	wptr = cik_get_ih_wptr(rdev);
8118 	if (wptr != rptr)
8119 		goto restart_ih;
8120 
8121 	return IRQ_HANDLED;
8122 }
8123 
8124 /*
8125  * startup/shutdown callbacks
8126  */
8127 static void cik_uvd_init(struct radeon_device *rdev)
8128 {
8129 	int r;
8130 
8131 	if (!rdev->has_uvd)
8132 		return;
8133 
8134 	r = radeon_uvd_init(rdev);
8135 	if (r) {
8136 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8137 		/*
8138 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8139 		 * to early fails cik_uvd_start() and thus nothing happens
8140 		 * there. So it is pointless to try to go through that code
8141 		 * hence why we disable uvd here.
8142 		 */
8143 		rdev->has_uvd = 0;
8144 		return;
8145 	}
8146 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8147 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8148 }
8149 
8150 static void cik_uvd_start(struct radeon_device *rdev)
8151 {
8152 	int r;
8153 
8154 	if (!rdev->has_uvd)
8155 		return;
8156 
8157 	r = radeon_uvd_resume(rdev);
8158 	if (r) {
8159 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8160 		goto error;
8161 	}
8162 	r = uvd_v4_2_resume(rdev);
8163 	if (r) {
8164 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8165 		goto error;
8166 	}
8167 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8168 	if (r) {
8169 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8170 		goto error;
8171 	}
8172 	return;
8173 
8174 error:
8175 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8176 }
8177 
8178 static void cik_uvd_resume(struct radeon_device *rdev)
8179 {
8180 	struct radeon_ring *ring;
8181 	int r;
8182 
8183 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8184 		return;
8185 
8186 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8187 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8188 	if (r) {
8189 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8190 		return;
8191 	}
8192 	r = uvd_v1_0_init(rdev);
8193 	if (r) {
8194 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8195 		return;
8196 	}
8197 }
8198 
8199 static void cik_vce_init(struct radeon_device *rdev)
8200 {
8201 	int r;
8202 
8203 	if (!rdev->has_vce)
8204 		return;
8205 
8206 	r = radeon_vce_init(rdev);
8207 	if (r) {
8208 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8209 		/*
8210 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8211 		 * to early fails cik_vce_start() and thus nothing happens
8212 		 * there. So it is pointless to try to go through that code
8213 		 * hence why we disable vce here.
8214 		 */
8215 		rdev->has_vce = 0;
8216 		return;
8217 	}
8218 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8219 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8220 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8221 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8222 }
8223 
8224 static void cik_vce_start(struct radeon_device *rdev)
8225 {
8226 	int r;
8227 
8228 	if (!rdev->has_vce)
8229 		return;
8230 
8231 	r = radeon_vce_resume(rdev);
8232 	if (r) {
8233 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8234 		goto error;
8235 	}
8236 	r = vce_v2_0_resume(rdev);
8237 	if (r) {
8238 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8239 		goto error;
8240 	}
8241 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8242 	if (r) {
8243 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8244 		goto error;
8245 	}
8246 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8247 	if (r) {
8248 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8249 		goto error;
8250 	}
8251 	return;
8252 
8253 error:
8254 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8255 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8256 }
8257 
8258 static void cik_vce_resume(struct radeon_device *rdev)
8259 {
8260 	struct radeon_ring *ring;
8261 	int r;
8262 
8263 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8264 		return;
8265 
8266 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8267 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8268 	if (r) {
8269 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8270 		return;
8271 	}
8272 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8273 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8274 	if (r) {
8275 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8276 		return;
8277 	}
8278 	r = vce_v1_0_init(rdev);
8279 	if (r) {
8280 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8281 		return;
8282 	}
8283 }
8284 
8285 /**
8286  * cik_startup - program the asic to a functional state
8287  *
8288  * @rdev: radeon_device pointer
8289  *
8290  * Programs the asic to a functional state (CIK).
8291  * Called by cik_init() and cik_resume().
8292  * Returns 0 for success, error for failure.
8293  */
8294 static int cik_startup(struct radeon_device *rdev)
8295 {
8296 	struct radeon_ring *ring;
8297 	u32 nop;
8298 	int r;
8299 
8300 	/* enable pcie gen2/3 link */
8301 	cik_pcie_gen3_enable(rdev);
8302 	/* enable aspm */
8303 	cik_program_aspm(rdev);
8304 
8305 	/* scratch needs to be initialized before MC */
8306 	r = r600_vram_scratch_init(rdev);
8307 	if (r)
8308 		return r;
8309 
8310 	cik_mc_program(rdev);
8311 
8312 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8313 		r = ci_mc_load_microcode(rdev);
8314 		if (r) {
8315 			DRM_ERROR("Failed to load MC firmware!\n");
8316 			return r;
8317 		}
8318 	}
8319 
8320 	r = cik_pcie_gart_enable(rdev);
8321 	if (r)
8322 		return r;
8323 	cik_gpu_init(rdev);
8324 
8325 	/* allocate rlc buffers */
8326 	if (rdev->flags & RADEON_IS_IGP) {
8327 		if (rdev->family == CHIP_KAVERI) {
8328 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8329 			rdev->rlc.reg_list_size =
8330 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8331 		} else {
8332 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8333 			rdev->rlc.reg_list_size =
8334 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8335 		}
8336 	}
8337 	rdev->rlc.cs_data = ci_cs_data;
8338 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8339 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8340 	r = sumo_rlc_init(rdev);
8341 	if (r) {
8342 		DRM_ERROR("Failed to init rlc BOs!\n");
8343 		return r;
8344 	}
8345 
8346 	/* allocate wb buffer */
8347 	r = radeon_wb_init(rdev);
8348 	if (r)
8349 		return r;
8350 
8351 	/* allocate mec buffers */
8352 	r = cik_mec_init(rdev);
8353 	if (r) {
8354 		DRM_ERROR("Failed to init MEC BOs!\n");
8355 		return r;
8356 	}
8357 
8358 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8359 	if (r) {
8360 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8361 		return r;
8362 	}
8363 
8364 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8365 	if (r) {
8366 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8367 		return r;
8368 	}
8369 
8370 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8371 	if (r) {
8372 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8373 		return r;
8374 	}
8375 
8376 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8377 	if (r) {
8378 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8379 		return r;
8380 	}
8381 
8382 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8383 	if (r) {
8384 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8385 		return r;
8386 	}
8387 
8388 	cik_uvd_start(rdev);
8389 	cik_vce_start(rdev);
8390 
8391 	/* Enable IRQ */
8392 	if (!rdev->irq.installed) {
8393 		r = radeon_irq_kms_init(rdev);
8394 		if (r)
8395 			return r;
8396 	}
8397 
8398 	r = cik_irq_init(rdev);
8399 	if (r) {
8400 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8401 		radeon_irq_kms_fini(rdev);
8402 		return r;
8403 	}
8404 	cik_irq_set(rdev);
8405 
8406 	if (rdev->family == CHIP_HAWAII) {
8407 		if (rdev->new_fw)
8408 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8409 		else
8410 			nop = RADEON_CP_PACKET2;
8411 	} else {
8412 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8413 	}
8414 
8415 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8416 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8417 			     nop);
8418 	if (r)
8419 		return r;
8420 
8421 	/* set up the compute queues */
8422 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8423 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8424 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8425 			     nop);
8426 	if (r)
8427 		return r;
8428 	ring->me = 1; /* first MEC */
8429 	ring->pipe = 0; /* first pipe */
8430 	ring->queue = 0; /* first queue */
8431 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8432 
8433 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8434 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8435 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8436 			     nop);
8437 	if (r)
8438 		return r;
8439 	/* dGPU only have 1 MEC */
8440 	ring->me = 1; /* first MEC */
8441 	ring->pipe = 0; /* first pipe */
8442 	ring->queue = 1; /* second queue */
8443 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8444 
8445 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8446 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8447 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8448 	if (r)
8449 		return r;
8450 
8451 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8452 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8453 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8454 	if (r)
8455 		return r;
8456 
8457 	r = cik_cp_resume(rdev);
8458 	if (r)
8459 		return r;
8460 
8461 	r = cik_sdma_resume(rdev);
8462 	if (r)
8463 		return r;
8464 
8465 	cik_uvd_resume(rdev);
8466 	cik_vce_resume(rdev);
8467 
8468 	r = radeon_ib_pool_init(rdev);
8469 	if (r) {
8470 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8471 		return r;
8472 	}
8473 
8474 	r = radeon_vm_manager_init(rdev);
8475 	if (r) {
8476 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8477 		return r;
8478 	}
8479 
8480 	r = radeon_audio_init(rdev);
8481 	if (r)
8482 		return r;
8483 
8484 	return 0;
8485 }
8486 
8487 /**
8488  * cik_resume - resume the asic to a functional state
8489  *
8490  * @rdev: radeon_device pointer
8491  *
8492  * Programs the asic to a functional state (CIK).
8493  * Called at resume.
8494  * Returns 0 for success, error for failure.
8495  */
8496 int cik_resume(struct radeon_device *rdev)
8497 {
8498 	int r;
8499 
8500 	/* post card */
8501 	atom_asic_init(rdev->mode_info.atom_context);
8502 
8503 	/* init golden registers */
8504 	cik_init_golden_registers(rdev);
8505 
8506 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8507 		radeon_pm_resume(rdev);
8508 
8509 	rdev->accel_working = true;
8510 	r = cik_startup(rdev);
8511 	if (r) {
8512 		DRM_ERROR("cik startup failed on resume\n");
8513 		rdev->accel_working = false;
8514 		return r;
8515 	}
8516 
8517 	return r;
8518 
8519 }
8520 
8521 /**
8522  * cik_suspend - suspend the asic
8523  *
8524  * @rdev: radeon_device pointer
8525  *
8526  * Bring the chip into a state suitable for suspend (CIK).
8527  * Called at suspend.
8528  * Returns 0 for success.
8529  */
8530 int cik_suspend(struct radeon_device *rdev)
8531 {
8532 	radeon_pm_suspend(rdev);
8533 	radeon_audio_fini(rdev);
8534 	radeon_vm_manager_fini(rdev);
8535 	cik_cp_enable(rdev, false);
8536 	cik_sdma_enable(rdev, false);
8537 	if (rdev->has_uvd) {
8538 		uvd_v1_0_fini(rdev);
8539 		radeon_uvd_suspend(rdev);
8540 	}
8541 	if (rdev->has_vce)
8542 		radeon_vce_suspend(rdev);
8543 	cik_fini_pg(rdev);
8544 	cik_fini_cg(rdev);
8545 	cik_irq_suspend(rdev);
8546 	radeon_wb_disable(rdev);
8547 	cik_pcie_gart_disable(rdev);
8548 	return 0;
8549 }
8550 
8551 /* Plan is to move initialization in that function and use
8552  * helper function so that radeon_device_init pretty much
8553  * do nothing more than calling asic specific function. This
8554  * should also allow to remove a bunch of callback function
8555  * like vram_info.
8556  */
8557 /**
8558  * cik_init - asic specific driver and hw init
8559  *
8560  * @rdev: radeon_device pointer
8561  *
8562  * Setup asic specific driver variables and program the hw
8563  * to a functional state (CIK).
8564  * Called at driver startup.
8565  * Returns 0 for success, errors for failure.
8566  */
8567 int cik_init(struct radeon_device *rdev)
8568 {
8569 	struct radeon_ring *ring;
8570 	int r;
8571 
8572 	/* Read BIOS */
8573 	if (!radeon_get_bios(rdev)) {
8574 		if (ASIC_IS_AVIVO(rdev))
8575 			return -EINVAL;
8576 	}
8577 	/* Must be an ATOMBIOS */
8578 	if (!rdev->is_atom_bios) {
8579 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8580 		return -EINVAL;
8581 	}
8582 	r = radeon_atombios_init(rdev);
8583 	if (r)
8584 		return r;
8585 
8586 	/* Post card if necessary */
8587 	if (!radeon_card_posted(rdev)) {
8588 		if (!rdev->bios) {
8589 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8590 			return -EINVAL;
8591 		}
8592 		DRM_INFO("GPU not posted. posting now...\n");
8593 		atom_asic_init(rdev->mode_info.atom_context);
8594 	}
8595 	/* init golden registers */
8596 	cik_init_golden_registers(rdev);
8597 	/* Initialize scratch registers */
8598 	cik_scratch_init(rdev);
8599 	/* Initialize surface registers */
8600 	radeon_surface_init(rdev);
8601 	/* Initialize clocks */
8602 	radeon_get_clock_info(rdev->ddev);
8603 
8604 	/* Fence driver */
8605 	r = radeon_fence_driver_init(rdev);
8606 	if (r)
8607 		return r;
8608 
8609 	/* initialize memory controller */
8610 	r = cik_mc_init(rdev);
8611 	if (r)
8612 		return r;
8613 	/* Memory manager */
8614 	r = radeon_bo_init(rdev);
8615 	if (r)
8616 		return r;
8617 
8618 	if (rdev->flags & RADEON_IS_IGP) {
8619 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8620 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8621 			r = cik_init_microcode(rdev);
8622 			if (r) {
8623 				DRM_ERROR("Failed to load firmware!\n");
8624 				return r;
8625 			}
8626 		}
8627 	} else {
8628 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8629 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8630 		    !rdev->mc_fw) {
8631 			r = cik_init_microcode(rdev);
8632 			if (r) {
8633 				DRM_ERROR("Failed to load firmware!\n");
8634 				return r;
8635 			}
8636 		}
8637 	}
8638 
8639 	/* Initialize power management */
8640 	radeon_pm_init(rdev);
8641 
8642 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8643 	ring->ring_obj = NULL;
8644 	r600_ring_init(rdev, ring, 1024 * 1024);
8645 
8646 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8647 	ring->ring_obj = NULL;
8648 	r600_ring_init(rdev, ring, 1024 * 1024);
8649 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8650 	if (r)
8651 		return r;
8652 
8653 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8654 	ring->ring_obj = NULL;
8655 	r600_ring_init(rdev, ring, 1024 * 1024);
8656 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8657 	if (r)
8658 		return r;
8659 
8660 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8661 	ring->ring_obj = NULL;
8662 	r600_ring_init(rdev, ring, 256 * 1024);
8663 
8664 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8665 	ring->ring_obj = NULL;
8666 	r600_ring_init(rdev, ring, 256 * 1024);
8667 
8668 	cik_uvd_init(rdev);
8669 	cik_vce_init(rdev);
8670 
8671 	rdev->ih.ring_obj = NULL;
8672 	r600_ih_ring_init(rdev, 64 * 1024);
8673 
8674 	r = r600_pcie_gart_init(rdev);
8675 	if (r)
8676 		return r;
8677 
8678 	rdev->accel_working = true;
8679 	r = cik_startup(rdev);
8680 	if (r) {
8681 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8682 		cik_cp_fini(rdev);
8683 		cik_sdma_fini(rdev);
8684 		cik_irq_fini(rdev);
8685 		sumo_rlc_fini(rdev);
8686 		cik_mec_fini(rdev);
8687 		radeon_wb_fini(rdev);
8688 		radeon_ib_pool_fini(rdev);
8689 		radeon_vm_manager_fini(rdev);
8690 		radeon_irq_kms_fini(rdev);
8691 		cik_pcie_gart_fini(rdev);
8692 		rdev->accel_working = false;
8693 	}
8694 
8695 	/* Don't start up if the MC ucode is missing.
8696 	 * The default clocks and voltages before the MC ucode
8697 	 * is loaded are not suffient for advanced operations.
8698 	 */
8699 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8700 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8701 		return -EINVAL;
8702 	}
8703 
8704 	return 0;
8705 }
8706 
8707 /**
8708  * cik_fini - asic specific driver and hw fini
8709  *
8710  * @rdev: radeon_device pointer
8711  *
8712  * Tear down the asic specific driver variables and program the hw
8713  * to an idle state (CIK).
8714  * Called at driver unload.
8715  */
8716 void cik_fini(struct radeon_device *rdev)
8717 {
8718 	radeon_pm_fini(rdev);
8719 	cik_cp_fini(rdev);
8720 	cik_sdma_fini(rdev);
8721 	cik_fini_pg(rdev);
8722 	cik_fini_cg(rdev);
8723 	cik_irq_fini(rdev);
8724 	sumo_rlc_fini(rdev);
8725 	cik_mec_fini(rdev);
8726 	radeon_wb_fini(rdev);
8727 	radeon_vm_manager_fini(rdev);
8728 	radeon_ib_pool_fini(rdev);
8729 	radeon_irq_kms_fini(rdev);
8730 	uvd_v1_0_fini(rdev);
8731 	radeon_uvd_fini(rdev);
8732 	radeon_vce_fini(rdev);
8733 	cik_pcie_gart_fini(rdev);
8734 	r600_vram_scratch_fini(rdev);
8735 	radeon_gem_fini(rdev);
8736 	radeon_fence_driver_fini(rdev);
8737 	radeon_bo_fini(rdev);
8738 	radeon_atombios_fini(rdev);
8739 	kfree(rdev->bios);
8740 	rdev->bios = NULL;
8741 }
8742 
8743 void dce8_program_fmt(struct drm_encoder *encoder)
8744 {
8745 	struct drm_device *dev = encoder->dev;
8746 	struct radeon_device *rdev = dev->dev_private;
8747 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8748 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8749 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8750 	int bpc = 0;
8751 	u32 tmp = 0;
8752 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8753 
8754 	if (connector) {
8755 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8756 		bpc = radeon_get_monitor_bpc(connector);
8757 		dither = radeon_connector->dither;
8758 	}
8759 
8760 	/* LVDS/eDP FMT is set up by atom */
8761 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8762 		return;
8763 
8764 	/* not needed for analog */
8765 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8766 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8767 		return;
8768 
8769 	if (bpc == 0)
8770 		return;
8771 
8772 	switch (bpc) {
8773 	case 6:
8774 		if (dither == RADEON_FMT_DITHER_ENABLE)
8775 			/* XXX sort out optimal dither settings */
8776 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8777 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8778 		else
8779 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8780 		break;
8781 	case 8:
8782 		if (dither == RADEON_FMT_DITHER_ENABLE)
8783 			/* XXX sort out optimal dither settings */
8784 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8785 				FMT_RGB_RANDOM_ENABLE |
8786 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8787 		else
8788 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8789 		break;
8790 	case 10:
8791 		if (dither == RADEON_FMT_DITHER_ENABLE)
8792 			/* XXX sort out optimal dither settings */
8793 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8794 				FMT_RGB_RANDOM_ENABLE |
8795 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8796 		else
8797 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8798 		break;
8799 	default:
8800 		/* not needed */
8801 		break;
8802 	}
8803 
8804 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8805 }
8806 
8807 /* display watermark setup */
8808 /**
8809  * dce8_line_buffer_adjust - Set up the line buffer
8810  *
8811  * @rdev: radeon_device pointer
8812  * @radeon_crtc: the selected display controller
8813  * @mode: the current display mode on the selected display
8814  * controller
8815  *
8816  * Setup up the line buffer allocation for
8817  * the selected display controller (CIK).
8818  * Returns the line buffer size in pixels.
8819  */
8820 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8821 				   struct radeon_crtc *radeon_crtc,
8822 				   struct drm_display_mode *mode)
8823 {
8824 	u32 tmp, buffer_alloc, i;
8825 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8826 	/*
8827 	 * Line Buffer Setup
8828 	 * There are 6 line buffers, one for each display controllers.
8829 	 * There are 3 partitions per LB. Select the number of partitions
8830 	 * to enable based on the display width.  For display widths larger
8831 	 * than 4096, you need use to use 2 display controllers and combine
8832 	 * them using the stereo blender.
8833 	 */
8834 	if (radeon_crtc->base.enabled && mode) {
8835 		if (mode->crtc_hdisplay < 1920) {
8836 			tmp = 1;
8837 			buffer_alloc = 2;
8838 		} else if (mode->crtc_hdisplay < 2560) {
8839 			tmp = 2;
8840 			buffer_alloc = 2;
8841 		} else if (mode->crtc_hdisplay < 4096) {
8842 			tmp = 0;
8843 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8844 		} else {
8845 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8846 			tmp = 0;
8847 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8848 		}
8849 	} else {
8850 		tmp = 1;
8851 		buffer_alloc = 0;
8852 	}
8853 
8854 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8855 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8856 
8857 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8858 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8859 	for (i = 0; i < rdev->usec_timeout; i++) {
8860 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8861 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8862 			break;
8863 		udelay(1);
8864 	}
8865 
8866 	if (radeon_crtc->base.enabled && mode) {
8867 		switch (tmp) {
8868 		case 0:
8869 		default:
8870 			return 4096 * 2;
8871 		case 1:
8872 			return 1920 * 2;
8873 		case 2:
8874 			return 2560 * 2;
8875 		}
8876 	}
8877 
8878 	/* controller not enabled, so no lb used */
8879 	return 0;
8880 }
8881 
8882 /**
8883  * cik_get_number_of_dram_channels - get the number of dram channels
8884  *
8885  * @rdev: radeon_device pointer
8886  *
8887  * Look up the number of video ram channels (CIK).
8888  * Used for display watermark bandwidth calculations
8889  * Returns the number of dram channels
8890  */
8891 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8892 {
8893 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8894 
8895 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8896 	case 0:
8897 	default:
8898 		return 1;
8899 	case 1:
8900 		return 2;
8901 	case 2:
8902 		return 4;
8903 	case 3:
8904 		return 8;
8905 	case 4:
8906 		return 3;
8907 	case 5:
8908 		return 6;
8909 	case 6:
8910 		return 10;
8911 	case 7:
8912 		return 12;
8913 	case 8:
8914 		return 16;
8915 	}
8916 }
8917 
8918 struct dce8_wm_params {
8919 	u32 dram_channels; /* number of dram channels */
8920 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8921 	u32 sclk;          /* engine clock in kHz */
8922 	u32 disp_clk;      /* display clock in kHz */
8923 	u32 src_width;     /* viewport width */
8924 	u32 active_time;   /* active display time in ns */
8925 	u32 blank_time;    /* blank time in ns */
8926 	bool interlaced;    /* mode is interlaced */
8927 	fixed20_12 vsc;    /* vertical scale ratio */
8928 	u32 num_heads;     /* number of active crtcs */
8929 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8930 	u32 lb_size;       /* line buffer allocated to pipe */
8931 	u32 vtaps;         /* vertical scaler taps */
8932 };
8933 
8934 /**
8935  * dce8_dram_bandwidth - get the dram bandwidth
8936  *
8937  * @wm: watermark calculation data
8938  *
8939  * Calculate the raw dram bandwidth (CIK).
8940  * Used for display watermark bandwidth calculations
8941  * Returns the dram bandwidth in MBytes/s
8942  */
8943 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8944 {
8945 	/* Calculate raw DRAM Bandwidth */
8946 	fixed20_12 dram_efficiency; /* 0.7 */
8947 	fixed20_12 yclk, dram_channels, bandwidth;
8948 	fixed20_12 a;
8949 
8950 	a.full = dfixed_const(1000);
8951 	yclk.full = dfixed_const(wm->yclk);
8952 	yclk.full = dfixed_div(yclk, a);
8953 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8954 	a.full = dfixed_const(10);
8955 	dram_efficiency.full = dfixed_const(7);
8956 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8957 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8958 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8959 
8960 	return dfixed_trunc(bandwidth);
8961 }
8962 
8963 /**
8964  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8965  *
8966  * @wm: watermark calculation data
8967  *
8968  * Calculate the dram bandwidth used for display (CIK).
8969  * Used for display watermark bandwidth calculations
8970  * Returns the dram bandwidth for display in MBytes/s
8971  */
8972 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8973 {
8974 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8975 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8976 	fixed20_12 yclk, dram_channels, bandwidth;
8977 	fixed20_12 a;
8978 
8979 	a.full = dfixed_const(1000);
8980 	yclk.full = dfixed_const(wm->yclk);
8981 	yclk.full = dfixed_div(yclk, a);
8982 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8983 	a.full = dfixed_const(10);
8984 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8985 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8986 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8987 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8988 
8989 	return dfixed_trunc(bandwidth);
8990 }
8991 
8992 /**
8993  * dce8_data_return_bandwidth - get the data return bandwidth
8994  *
8995  * @wm: watermark calculation data
8996  *
8997  * Calculate the data return bandwidth used for display (CIK).
8998  * Used for display watermark bandwidth calculations
8999  * Returns the data return bandwidth in MBytes/s
9000  */
9001 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9002 {
9003 	/* Calculate the display Data return Bandwidth */
9004 	fixed20_12 return_efficiency; /* 0.8 */
9005 	fixed20_12 sclk, bandwidth;
9006 	fixed20_12 a;
9007 
9008 	a.full = dfixed_const(1000);
9009 	sclk.full = dfixed_const(wm->sclk);
9010 	sclk.full = dfixed_div(sclk, a);
9011 	a.full = dfixed_const(10);
9012 	return_efficiency.full = dfixed_const(8);
9013 	return_efficiency.full = dfixed_div(return_efficiency, a);
9014 	a.full = dfixed_const(32);
9015 	bandwidth.full = dfixed_mul(a, sclk);
9016 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9017 
9018 	return dfixed_trunc(bandwidth);
9019 }
9020 
9021 /**
9022  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9023  *
9024  * @wm: watermark calculation data
9025  *
9026  * Calculate the dmif bandwidth used for display (CIK).
9027  * Used for display watermark bandwidth calculations
9028  * Returns the dmif bandwidth in MBytes/s
9029  */
9030 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9031 {
9032 	/* Calculate the DMIF Request Bandwidth */
9033 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9034 	fixed20_12 disp_clk, bandwidth;
9035 	fixed20_12 a, b;
9036 
9037 	a.full = dfixed_const(1000);
9038 	disp_clk.full = dfixed_const(wm->disp_clk);
9039 	disp_clk.full = dfixed_div(disp_clk, a);
9040 	a.full = dfixed_const(32);
9041 	b.full = dfixed_mul(a, disp_clk);
9042 
9043 	a.full = dfixed_const(10);
9044 	disp_clk_request_efficiency.full = dfixed_const(8);
9045 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9046 
9047 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9048 
9049 	return dfixed_trunc(bandwidth);
9050 }
9051 
9052 /**
9053  * dce8_available_bandwidth - get the min available bandwidth
9054  *
9055  * @wm: watermark calculation data
9056  *
9057  * Calculate the min available bandwidth used for display (CIK).
9058  * Used for display watermark bandwidth calculations
9059  * Returns the min available bandwidth in MBytes/s
9060  */
9061 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9062 {
9063 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9064 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9065 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9066 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9067 
9068 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9069 }
9070 
9071 /**
9072  * dce8_average_bandwidth - get the average available bandwidth
9073  *
9074  * @wm: watermark calculation data
9075  *
9076  * Calculate the average available bandwidth used for display (CIK).
9077  * Used for display watermark bandwidth calculations
9078  * Returns the average available bandwidth in MBytes/s
9079  */
9080 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9081 {
9082 	/* Calculate the display mode Average Bandwidth
9083 	 * DisplayMode should contain the source and destination dimensions,
9084 	 * timing, etc.
9085 	 */
9086 	fixed20_12 bpp;
9087 	fixed20_12 line_time;
9088 	fixed20_12 src_width;
9089 	fixed20_12 bandwidth;
9090 	fixed20_12 a;
9091 
9092 	a.full = dfixed_const(1000);
9093 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9094 	line_time.full = dfixed_div(line_time, a);
9095 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9096 	src_width.full = dfixed_const(wm->src_width);
9097 	bandwidth.full = dfixed_mul(src_width, bpp);
9098 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9099 	bandwidth.full = dfixed_div(bandwidth, line_time);
9100 
9101 	return dfixed_trunc(bandwidth);
9102 }
9103 
9104 /**
9105  * dce8_latency_watermark - get the latency watermark
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the latency watermark (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the latency watermark in ns
9112  */
9113 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9114 {
9115 	/* First calculate the latency in ns */
9116 	u32 mc_latency = 2000; /* 2000 ns. */
9117 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9118 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9119 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9120 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9121 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9122 		(wm->num_heads * cursor_line_pair_return_time);
9123 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9124 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9125 	u32 tmp, dmif_size = 12288;
9126 	fixed20_12 a, b, c;
9127 
9128 	if (wm->num_heads == 0)
9129 		return 0;
9130 
9131 	a.full = dfixed_const(2);
9132 	b.full = dfixed_const(1);
9133 	if ((wm->vsc.full > a.full) ||
9134 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9135 	    (wm->vtaps >= 5) ||
9136 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9137 		max_src_lines_per_dst_line = 4;
9138 	else
9139 		max_src_lines_per_dst_line = 2;
9140 
9141 	a.full = dfixed_const(available_bandwidth);
9142 	b.full = dfixed_const(wm->num_heads);
9143 	a.full = dfixed_div(a, b);
9144 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9145 	tmp = min(dfixed_trunc(a), tmp);
9146 
9147 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9148 
9149 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9150 	b.full = dfixed_const(1000);
9151 	c.full = dfixed_const(lb_fill_bw);
9152 	b.full = dfixed_div(c, b);
9153 	a.full = dfixed_div(a, b);
9154 	line_fill_time = dfixed_trunc(a);
9155 
9156 	if (line_fill_time < wm->active_time)
9157 		return latency;
9158 	else
9159 		return latency + (line_fill_time - wm->active_time);
9160 
9161 }
9162 
9163 /**
9164  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9165  * average and available dram bandwidth
9166  *
9167  * @wm: watermark calculation data
9168  *
9169  * Check if the display average bandwidth fits in the display
9170  * dram bandwidth (CIK).
9171  * Used for display watermark bandwidth calculations
9172  * Returns true if the display fits, false if not.
9173  */
9174 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9175 {
9176 	if (dce8_average_bandwidth(wm) <=
9177 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9178 		return true;
9179 	else
9180 		return false;
9181 }
9182 
9183 /**
9184  * dce8_average_bandwidth_vs_available_bandwidth - check
9185  * average and available bandwidth
9186  *
9187  * @wm: watermark calculation data
9188  *
9189  * Check if the display average bandwidth fits in the display
9190  * available bandwidth (CIK).
9191  * Used for display watermark bandwidth calculations
9192  * Returns true if the display fits, false if not.
9193  */
9194 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9195 {
9196 	if (dce8_average_bandwidth(wm) <=
9197 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9198 		return true;
9199 	else
9200 		return false;
9201 }
9202 
9203 /**
9204  * dce8_check_latency_hiding - check latency hiding
9205  *
9206  * @wm: watermark calculation data
9207  *
9208  * Check latency hiding (CIK).
9209  * Used for display watermark bandwidth calculations
9210  * Returns true if the display fits, false if not.
9211  */
9212 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9213 {
9214 	u32 lb_partitions = wm->lb_size / wm->src_width;
9215 	u32 line_time = wm->active_time + wm->blank_time;
9216 	u32 latency_tolerant_lines;
9217 	u32 latency_hiding;
9218 	fixed20_12 a;
9219 
9220 	a.full = dfixed_const(1);
9221 	if (wm->vsc.full > a.full)
9222 		latency_tolerant_lines = 1;
9223 	else {
9224 		if (lb_partitions <= (wm->vtaps + 1))
9225 			latency_tolerant_lines = 1;
9226 		else
9227 			latency_tolerant_lines = 2;
9228 	}
9229 
9230 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9231 
9232 	if (dce8_latency_watermark(wm) <= latency_hiding)
9233 		return true;
9234 	else
9235 		return false;
9236 }
9237 
9238 /**
9239  * dce8_program_watermarks - program display watermarks
9240  *
9241  * @rdev: radeon_device pointer
9242  * @radeon_crtc: the selected display controller
9243  * @lb_size: line buffer size
9244  * @num_heads: number of display controllers in use
9245  *
9246  * Calculate and program the display watermarks for the
9247  * selected display controller (CIK).
9248  */
9249 static void dce8_program_watermarks(struct radeon_device *rdev,
9250 				    struct radeon_crtc *radeon_crtc,
9251 				    u32 lb_size, u32 num_heads)
9252 {
9253 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9254 	struct dce8_wm_params wm_low, wm_high;
9255 	u32 active_time;
9256 	u32 line_time = 0;
9257 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9258 	u32 tmp, wm_mask;
9259 
9260 	if (radeon_crtc->base.enabled && num_heads && mode) {
9261 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9262 					    (u32)mode->clock);
9263 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9264 					  (u32)mode->clock);
9265 		line_time = min(line_time, (u32)65535);
9266 
9267 		/* watermark for high clocks */
9268 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9269 		    rdev->pm.dpm_enabled) {
9270 			wm_high.yclk =
9271 				radeon_dpm_get_mclk(rdev, false) * 10;
9272 			wm_high.sclk =
9273 				radeon_dpm_get_sclk(rdev, false) * 10;
9274 		} else {
9275 			wm_high.yclk = rdev->pm.current_mclk * 10;
9276 			wm_high.sclk = rdev->pm.current_sclk * 10;
9277 		}
9278 
9279 		wm_high.disp_clk = mode->clock;
9280 		wm_high.src_width = mode->crtc_hdisplay;
9281 		wm_high.active_time = active_time;
9282 		wm_high.blank_time = line_time - wm_high.active_time;
9283 		wm_high.interlaced = false;
9284 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9285 			wm_high.interlaced = true;
9286 		wm_high.vsc = radeon_crtc->vsc;
9287 		wm_high.vtaps = 1;
9288 		if (radeon_crtc->rmx_type != RMX_OFF)
9289 			wm_high.vtaps = 2;
9290 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9291 		wm_high.lb_size = lb_size;
9292 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9293 		wm_high.num_heads = num_heads;
9294 
9295 		/* set for high clocks */
9296 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9297 
9298 		/* possibly force display priority to high */
9299 		/* should really do this at mode validation time... */
9300 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9301 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9302 		    !dce8_check_latency_hiding(&wm_high) ||
9303 		    (rdev->disp_priority == 2)) {
9304 			DRM_DEBUG_KMS("force priority to high\n");
9305 		}
9306 
9307 		/* watermark for low clocks */
9308 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9309 		    rdev->pm.dpm_enabled) {
9310 			wm_low.yclk =
9311 				radeon_dpm_get_mclk(rdev, true) * 10;
9312 			wm_low.sclk =
9313 				radeon_dpm_get_sclk(rdev, true) * 10;
9314 		} else {
9315 			wm_low.yclk = rdev->pm.current_mclk * 10;
9316 			wm_low.sclk = rdev->pm.current_sclk * 10;
9317 		}
9318 
9319 		wm_low.disp_clk = mode->clock;
9320 		wm_low.src_width = mode->crtc_hdisplay;
9321 		wm_low.active_time = active_time;
9322 		wm_low.blank_time = line_time - wm_low.active_time;
9323 		wm_low.interlaced = false;
9324 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9325 			wm_low.interlaced = true;
9326 		wm_low.vsc = radeon_crtc->vsc;
9327 		wm_low.vtaps = 1;
9328 		if (radeon_crtc->rmx_type != RMX_OFF)
9329 			wm_low.vtaps = 2;
9330 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9331 		wm_low.lb_size = lb_size;
9332 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9333 		wm_low.num_heads = num_heads;
9334 
9335 		/* set for low clocks */
9336 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9337 
9338 		/* possibly force display priority to high */
9339 		/* should really do this at mode validation time... */
9340 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9341 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9342 		    !dce8_check_latency_hiding(&wm_low) ||
9343 		    (rdev->disp_priority == 2)) {
9344 			DRM_DEBUG_KMS("force priority to high\n");
9345 		}
9346 
9347 		/* Save number of lines the linebuffer leads before the scanout */
9348 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9349 	}
9350 
9351 	/* select wm A */
9352 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9353 	tmp = wm_mask;
9354 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9355 	tmp |= LATENCY_WATERMARK_MASK(1);
9356 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9357 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9358 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9359 		LATENCY_HIGH_WATERMARK(line_time)));
9360 	/* select wm B */
9361 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9362 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9363 	tmp |= LATENCY_WATERMARK_MASK(2);
9364 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9365 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9366 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9367 		LATENCY_HIGH_WATERMARK(line_time)));
9368 	/* restore original selection */
9369 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9370 
9371 	/* save values for DPM */
9372 	radeon_crtc->line_time = line_time;
9373 	radeon_crtc->wm_high = latency_watermark_a;
9374 	radeon_crtc->wm_low = latency_watermark_b;
9375 }
9376 
9377 /**
9378  * dce8_bandwidth_update - program display watermarks
9379  *
9380  * @rdev: radeon_device pointer
9381  *
9382  * Calculate and program the display watermarks and line
9383  * buffer allocation (CIK).
9384  */
9385 void dce8_bandwidth_update(struct radeon_device *rdev)
9386 {
9387 	struct drm_display_mode *mode = NULL;
9388 	u32 num_heads = 0, lb_size;
9389 	int i;
9390 
9391 	if (!rdev->mode_info.mode_config_initialized)
9392 		return;
9393 
9394 	radeon_update_display_priority(rdev);
9395 
9396 	for (i = 0; i < rdev->num_crtc; i++) {
9397 		if (rdev->mode_info.crtcs[i]->base.enabled)
9398 			num_heads++;
9399 	}
9400 	for (i = 0; i < rdev->num_crtc; i++) {
9401 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9402 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9403 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9404 	}
9405 }
9406 
9407 /**
9408  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9409  *
9410  * @rdev: radeon_device pointer
9411  *
9412  * Fetches a GPU clock counter snapshot (SI).
9413  * Returns the 64 bit clock counter snapshot.
9414  */
9415 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9416 {
9417 	uint64_t clock;
9418 
9419 	mutex_lock(&rdev->gpu_clock_mutex);
9420 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9421 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9422 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9423 	mutex_unlock(&rdev->gpu_clock_mutex);
9424 	return clock;
9425 }
9426 
9427 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9428 			     u32 cntl_reg, u32 status_reg)
9429 {
9430 	int r, i;
9431 	struct atom_clock_dividers dividers;
9432 	uint32_t tmp;
9433 
9434 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9435 					   clock, false, &dividers);
9436 	if (r)
9437 		return r;
9438 
9439 	tmp = RREG32_SMC(cntl_reg);
9440 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9441 	tmp |= dividers.post_divider;
9442 	WREG32_SMC(cntl_reg, tmp);
9443 
9444 	for (i = 0; i < 100; i++) {
9445 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9446 			break;
9447 		mdelay(10);
9448 	}
9449 	if (i == 100)
9450 		return -ETIMEDOUT;
9451 
9452 	return 0;
9453 }
9454 
9455 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9456 {
9457 	int r = 0;
9458 
9459 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9460 	if (r)
9461 		return r;
9462 
9463 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9464 	return r;
9465 }
9466 
9467 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9468 {
9469 	int r, i;
9470 	struct atom_clock_dividers dividers;
9471 	u32 tmp;
9472 
9473 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9474 					   ecclk, false, &dividers);
9475 	if (r)
9476 		return r;
9477 
9478 	for (i = 0; i < 100; i++) {
9479 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9480 			break;
9481 		mdelay(10);
9482 	}
9483 	if (i == 100)
9484 		return -ETIMEDOUT;
9485 
9486 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9487 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9488 	tmp |= dividers.post_divider;
9489 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9490 
9491 	for (i = 0; i < 100; i++) {
9492 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9493 			break;
9494 		mdelay(10);
9495 	}
9496 	if (i == 100)
9497 		return -ETIMEDOUT;
9498 
9499 	return 0;
9500 }
9501 
9502 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9503 {
9504 	struct pci_dev *root = rdev->pdev->bus->self;
9505 	int bridge_pos, gpu_pos;
9506 	u32 speed_cntl, mask, current_data_rate;
9507 	int ret, i;
9508 	u16 tmp16;
9509 
9510 	if (pci_is_root_bus(rdev->pdev->bus))
9511 		return;
9512 
9513 	if (radeon_pcie_gen2 == 0)
9514 		return;
9515 
9516 	if (rdev->flags & RADEON_IS_IGP)
9517 		return;
9518 
9519 	if (!(rdev->flags & RADEON_IS_PCIE))
9520 		return;
9521 
9522 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9523 	if (ret != 0)
9524 		return;
9525 
9526 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9527 		return;
9528 
9529 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9530 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9531 		LC_CURRENT_DATA_RATE_SHIFT;
9532 	if (mask & DRM_PCIE_SPEED_80) {
9533 		if (current_data_rate == 2) {
9534 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9535 			return;
9536 		}
9537 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9538 	} else if (mask & DRM_PCIE_SPEED_50) {
9539 		if (current_data_rate == 1) {
9540 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9541 			return;
9542 		}
9543 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9544 	}
9545 
9546 	bridge_pos = pci_pcie_cap(root);
9547 	if (!bridge_pos)
9548 		return;
9549 
9550 	gpu_pos = pci_pcie_cap(rdev->pdev);
9551 	if (!gpu_pos)
9552 		return;
9553 
9554 	if (mask & DRM_PCIE_SPEED_80) {
9555 		/* re-try equalization if gen3 is not already enabled */
9556 		if (current_data_rate != 2) {
9557 			u16 bridge_cfg, gpu_cfg;
9558 			u16 bridge_cfg2, gpu_cfg2;
9559 			u32 max_lw, current_lw, tmp;
9560 
9561 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9562 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9563 
9564 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9565 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9566 
9567 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9568 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9569 
9570 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9571 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9572 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9573 
9574 			if (current_lw < max_lw) {
9575 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9576 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9577 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9578 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9579 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9580 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9581 				}
9582 			}
9583 
9584 			for (i = 0; i < 10; i++) {
9585 				/* check status */
9586 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9587 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9588 					break;
9589 
9590 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9591 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9592 
9593 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9594 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9595 
9596 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9597 				tmp |= LC_SET_QUIESCE;
9598 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9599 
9600 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9601 				tmp |= LC_REDO_EQ;
9602 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9603 
9604 				mdelay(100);
9605 
9606 				/* linkctl */
9607 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9608 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9609 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9610 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9611 
9612 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9613 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9614 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9615 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9616 
9617 				/* linkctl2 */
9618 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9619 				tmp16 &= ~((1 << 4) | (7 << 9));
9620 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9621 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9622 
9623 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9624 				tmp16 &= ~((1 << 4) | (7 << 9));
9625 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9626 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9627 
9628 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9629 				tmp &= ~LC_SET_QUIESCE;
9630 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9631 			}
9632 		}
9633 	}
9634 
9635 	/* set the link speed */
9636 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9637 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9638 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9639 
9640 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9641 	tmp16 &= ~0xf;
9642 	if (mask & DRM_PCIE_SPEED_80)
9643 		tmp16 |= 3; /* gen3 */
9644 	else if (mask & DRM_PCIE_SPEED_50)
9645 		tmp16 |= 2; /* gen2 */
9646 	else
9647 		tmp16 |= 1; /* gen1 */
9648 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9649 
9650 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9651 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9652 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9653 
9654 	for (i = 0; i < rdev->usec_timeout; i++) {
9655 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9656 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9657 			break;
9658 		udelay(1);
9659 	}
9660 }
9661 
9662 static void cik_program_aspm(struct radeon_device *rdev)
9663 {
9664 	u32 data, orig;
9665 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9666 	bool disable_clkreq = false;
9667 
9668 	if (radeon_aspm == 0)
9669 		return;
9670 
9671 	/* XXX double check IGPs */
9672 	if (rdev->flags & RADEON_IS_IGP)
9673 		return;
9674 
9675 	if (!(rdev->flags & RADEON_IS_PCIE))
9676 		return;
9677 
9678 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9679 	data &= ~LC_XMIT_N_FTS_MASK;
9680 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9681 	if (orig != data)
9682 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9683 
9684 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9685 	data |= LC_GO_TO_RECOVERY;
9686 	if (orig != data)
9687 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9688 
9689 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9690 	data |= P_IGNORE_EDB_ERR;
9691 	if (orig != data)
9692 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9693 
9694 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9695 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9696 	data |= LC_PMI_TO_L1_DIS;
9697 	if (!disable_l0s)
9698 		data |= LC_L0S_INACTIVITY(7);
9699 
9700 	if (!disable_l1) {
9701 		data |= LC_L1_INACTIVITY(7);
9702 		data &= ~LC_PMI_TO_L1_DIS;
9703 		if (orig != data)
9704 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9705 
9706 		if (!disable_plloff_in_l1) {
9707 			bool clk_req_support;
9708 
9709 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9710 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9711 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9712 			if (orig != data)
9713 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9714 
9715 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9716 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9717 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9718 			if (orig != data)
9719 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9720 
9721 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9722 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9723 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9724 			if (orig != data)
9725 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9726 
9727 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9728 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9729 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9730 			if (orig != data)
9731 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9732 
9733 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9734 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9735 			data |= LC_DYN_LANES_PWR_STATE(3);
9736 			if (orig != data)
9737 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9738 
9739 			if (!disable_clkreq &&
9740 			    !pci_is_root_bus(rdev->pdev->bus)) {
9741 				struct pci_dev *root = rdev->pdev->bus->self;
9742 				u32 lnkcap;
9743 
9744 				clk_req_support = false;
9745 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9746 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9747 					clk_req_support = true;
9748 			} else {
9749 				clk_req_support = false;
9750 			}
9751 
9752 			if (clk_req_support) {
9753 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9754 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9755 				if (orig != data)
9756 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9757 
9758 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9759 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9760 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9761 				if (orig != data)
9762 					WREG32_SMC(THM_CLK_CNTL, data);
9763 
9764 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9765 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9766 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9767 				if (orig != data)
9768 					WREG32_SMC(MISC_CLK_CTRL, data);
9769 
9770 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9771 				data &= ~BCLK_AS_XCLK;
9772 				if (orig != data)
9773 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9774 
9775 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9776 				data &= ~FORCE_BIF_REFCLK_EN;
9777 				if (orig != data)
9778 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9779 
9780 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9781 				data &= ~MPLL_CLKOUT_SEL_MASK;
9782 				data |= MPLL_CLKOUT_SEL(4);
9783 				if (orig != data)
9784 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9785 			}
9786 		}
9787 	} else {
9788 		if (orig != data)
9789 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9790 	}
9791 
9792 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9793 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9794 	if (orig != data)
9795 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9796 
9797 	if (!disable_l0s) {
9798 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9799 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9800 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9801 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9802 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9803 				data &= ~LC_L0S_INACTIVITY_MASK;
9804 				if (orig != data)
9805 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9806 			}
9807 		}
9808 	}
9809 }
9810