xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 8571e645)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715 	u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	u32 *tile = rdev->config.cik.tile_mode_array;
2347 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2348 	const u32 num_tile_mode_states =
2349 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2350 	const u32 num_secondary_tile_mode_states =
2351 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2352 	u32 reg_offset, split_equal_to_row_size;
2353 	u32 num_pipe_configs;
2354 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2355 		rdev->config.cik.max_shader_engines;
2356 
2357 	switch (rdev->config.cik.mem_row_size_in_kb) {
2358 	case 1:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2360 		break;
2361 	case 2:
2362 	default:
2363 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2364 		break;
2365 	case 4:
2366 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2367 		break;
2368 	}
2369 
2370 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2371 	if (num_pipe_configs > 8)
2372 		num_pipe_configs = 16;
2373 
2374 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2375 		tile[reg_offset] = 0;
2376 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2377 		macrotile[reg_offset] = 0;
2378 
2379 	switch(num_pipe_configs) {
2380 	case 16:
2381 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2385 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2389 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2397 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(split_equal_to_row_size));
2401 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2402 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2406 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2408 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 			   TILE_SPLIT(split_equal_to_row_size));
2412 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2413 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2414 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2417 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2432 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2447 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2449 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2454 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459 
2460 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463 			   NUM_BANKS(ADDR_SURF_16_BANK));
2464 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 			   NUM_BANKS(ADDR_SURF_16_BANK));
2468 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 			   NUM_BANKS(ADDR_SURF_16_BANK));
2472 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			   NUM_BANKS(ADDR_SURF_16_BANK));
2476 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 			   NUM_BANKS(ADDR_SURF_8_BANK));
2480 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 			   NUM_BANKS(ADDR_SURF_4_BANK));
2484 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			   NUM_BANKS(ADDR_SURF_2_BANK));
2488 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491 			   NUM_BANKS(ADDR_SURF_16_BANK));
2492 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 			   NUM_BANKS(ADDR_SURF_16_BANK));
2496 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 			    NUM_BANKS(ADDR_SURF_16_BANK));
2500 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 			    NUM_BANKS(ADDR_SURF_8_BANK));
2504 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507 			    NUM_BANKS(ADDR_SURF_4_BANK));
2508 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 			    NUM_BANKS(ADDR_SURF_2_BANK));
2512 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 			    NUM_BANKS(ADDR_SURF_2_BANK));
2516 
2517 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2518 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2519 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2520 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2521 		break;
2522 
2523 	case 8:
2524 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2532 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2540 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(split_equal_to_row_size));
2544 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2551 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 			   TILE_SPLIT(split_equal_to_row_size));
2555 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2557 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2560 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2575 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2590 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2597 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602 
2603 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606 				NUM_BANKS(ADDR_SURF_16_BANK));
2607 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2610 				NUM_BANKS(ADDR_SURF_16_BANK));
2611 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614 				NUM_BANKS(ADDR_SURF_16_BANK));
2615 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622 				NUM_BANKS(ADDR_SURF_8_BANK));
2623 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 				NUM_BANKS(ADDR_SURF_4_BANK));
2627 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 				NUM_BANKS(ADDR_SURF_2_BANK));
2631 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650 				NUM_BANKS(ADDR_SURF_8_BANK));
2651 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				NUM_BANKS(ADDR_SURF_4_BANK));
2655 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 				NUM_BANKS(ADDR_SURF_2_BANK));
2659 
2660 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2662 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2664 		break;
2665 
2666 	case 4:
2667 		if (num_rbs == 4) {
2668 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2672 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2676 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2684 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(split_equal_to_row_size));
2688 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2693 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2695 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 			   TILE_SPLIT(split_equal_to_row_size));
2699 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2700 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2701 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2704 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2719 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2732 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2734 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2735 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2743 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 
2747 		} else if (num_rbs < 4) {
2748 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2752 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2756 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2764 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(split_equal_to_row_size));
2768 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2771 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2772 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2775 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(split_equal_to_row_size));
2779 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2780 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2781 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2784 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2799 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2814 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2815 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 		}
2827 
2828 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843 				NUM_BANKS(ADDR_SURF_16_BANK));
2844 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_8_BANK));
2852 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2855 				NUM_BANKS(ADDR_SURF_4_BANK));
2856 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863 				NUM_BANKS(ADDR_SURF_16_BANK));
2864 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 				NUM_BANKS(ADDR_SURF_16_BANK));
2868 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871 				NUM_BANKS(ADDR_SURF_16_BANK));
2872 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879 				NUM_BANKS(ADDR_SURF_8_BANK));
2880 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2883 				NUM_BANKS(ADDR_SURF_4_BANK));
2884 
2885 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2886 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2887 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2888 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2889 		break;
2890 
2891 	case 2:
2892 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P2) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2900 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2908 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(split_equal_to_row_size));
2912 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913 			   PIPE_CONFIG(ADDR_SURF_P2) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917 			   PIPE_CONFIG(ADDR_SURF_P2) |
2918 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2919 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P2) |
2922 			   TILE_SPLIT(split_equal_to_row_size));
2923 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2924 			   PIPE_CONFIG(ADDR_SURF_P2);
2925 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 			   PIPE_CONFIG(ADDR_SURF_P2));
2928 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 			    PIPE_CONFIG(ADDR_SURF_P2) |
2942 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2943 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2) |
2946 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2));
2958 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2959 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2960 			    PIPE_CONFIG(ADDR_SURF_P2) |
2961 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 			    PIPE_CONFIG(ADDR_SURF_P2) |
2965 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2967 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968 			    PIPE_CONFIG(ADDR_SURF_P2) |
2969 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970 
2971 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2972 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2973 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974 				NUM_BANKS(ADDR_SURF_16_BANK));
2975 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998 				NUM_BANKS(ADDR_SURF_8_BANK));
2999 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002 				NUM_BANKS(ADDR_SURF_16_BANK));
3003 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014 				NUM_BANKS(ADDR_SURF_16_BANK));
3015 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026 				NUM_BANKS(ADDR_SURF_8_BANK));
3027 
3028 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3029 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3030 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3032 		break;
3033 
3034 	default:
3035 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3036 	}
3037 }
3038 
3039 /**
3040  * cik_select_se_sh - select which SE, SH to address
3041  *
3042  * @rdev: radeon_device pointer
3043  * @se_num: shader engine to address
3044  * @sh_num: sh block to address
3045  *
3046  * Select which SE, SH combinations to address. Certain
3047  * registers are instanced per SE or SH.  0xffffffff means
3048  * broadcast to all SEs or SHs (CIK).
3049  */
3050 static void cik_select_se_sh(struct radeon_device *rdev,
3051 			     u32 se_num, u32 sh_num)
3052 {
3053 	u32 data = INSTANCE_BROADCAST_WRITES;
3054 
3055 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3056 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3057 	else if (se_num == 0xffffffff)
3058 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3059 	else if (sh_num == 0xffffffff)
3060 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3061 	else
3062 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3063 	WREG32(GRBM_GFX_INDEX, data);
3064 }
3065 
3066 /**
3067  * cik_create_bitmask - create a bitmask
3068  *
3069  * @bit_width: length of the mask
3070  *
3071  * create a variable length bit mask (CIK).
3072  * Returns the bitmask.
3073  */
3074 static u32 cik_create_bitmask(u32 bit_width)
3075 {
3076 	u32 i, mask = 0;
3077 
3078 	for (i = 0; i < bit_width; i++) {
3079 		mask <<= 1;
3080 		mask |= 1;
3081 	}
3082 	return mask;
3083 }
3084 
3085 /**
3086  * cik_get_rb_disabled - computes the mask of disabled RBs
3087  *
3088  * @rdev: radeon_device pointer
3089  * @max_rb_num: max RBs (render backends) for the asic
3090  * @se_num: number of SEs (shader engines) for the asic
3091  * @sh_per_se: number of SH blocks per SE for the asic
3092  *
3093  * Calculates the bitmask of disabled RBs (CIK).
3094  * Returns the disabled RB bitmask.
3095  */
3096 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3097 			      u32 max_rb_num_per_se,
3098 			      u32 sh_per_se)
3099 {
3100 	u32 data, mask;
3101 
3102 	data = RREG32(CC_RB_BACKEND_DISABLE);
3103 	if (data & 1)
3104 		data &= BACKEND_DISABLE_MASK;
3105 	else
3106 		data = 0;
3107 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3108 
3109 	data >>= BACKEND_DISABLE_SHIFT;
3110 
3111 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3112 
3113 	return data & mask;
3114 }
3115 
3116 /**
3117  * cik_setup_rb - setup the RBs on the asic
3118  *
3119  * @rdev: radeon_device pointer
3120  * @se_num: number of SEs (shader engines) for the asic
3121  * @sh_per_se: number of SH blocks per SE for the asic
3122  * @max_rb_num: max RBs (render backends) for the asic
3123  *
3124  * Configures per-SE/SH RB registers (CIK).
3125  */
3126 static void cik_setup_rb(struct radeon_device *rdev,
3127 			 u32 se_num, u32 sh_per_se,
3128 			 u32 max_rb_num_per_se)
3129 {
3130 	int i, j;
3131 	u32 data, mask;
3132 	u32 disabled_rbs = 0;
3133 	u32 enabled_rbs = 0;
3134 
3135 	mutex_lock(&rdev->grbm_idx_mutex);
3136 	for (i = 0; i < se_num; i++) {
3137 		for (j = 0; j < sh_per_se; j++) {
3138 			cik_select_se_sh(rdev, i, j);
3139 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3140 			if (rdev->family == CHIP_HAWAII)
3141 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3142 			else
3143 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3144 		}
3145 	}
3146 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3147 	mutex_unlock(&rdev->grbm_idx_mutex);
3148 
3149 	mask = 1;
3150 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151 		if (!(disabled_rbs & mask))
3152 			enabled_rbs |= mask;
3153 		mask <<= 1;
3154 	}
3155 
3156 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3157 
3158 	mutex_lock(&rdev->grbm_idx_mutex);
3159 	for (i = 0; i < se_num; i++) {
3160 		cik_select_se_sh(rdev, i, 0xffffffff);
3161 		data = 0;
3162 		for (j = 0; j < sh_per_se; j++) {
3163 			switch (enabled_rbs & 3) {
3164 			case 0:
3165 				if (j == 0)
3166 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3167 				else
3168 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3169 				break;
3170 			case 1:
3171 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3172 				break;
3173 			case 2:
3174 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3175 				break;
3176 			case 3:
3177 			default:
3178 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3179 				break;
3180 			}
3181 			enabled_rbs >>= 2;
3182 		}
3183 		WREG32(PA_SC_RASTER_CONFIG, data);
3184 	}
3185 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3186 	mutex_unlock(&rdev->grbm_idx_mutex);
3187 }
3188 
3189 /**
3190  * cik_gpu_init - setup the 3D engine
3191  *
3192  * @rdev: radeon_device pointer
3193  *
3194  * Configures the 3D engine and tiling configuration
3195  * registers so that the 3D engine is usable.
3196  */
3197 static void cik_gpu_init(struct radeon_device *rdev)
3198 {
3199 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3200 	u32 mc_shared_chmap, mc_arb_ramcfg;
3201 	u32 hdp_host_path_cntl;
3202 	u32 tmp;
3203 	int i, j;
3204 
3205 	switch (rdev->family) {
3206 	case CHIP_BONAIRE:
3207 		rdev->config.cik.max_shader_engines = 2;
3208 		rdev->config.cik.max_tile_pipes = 4;
3209 		rdev->config.cik.max_cu_per_sh = 7;
3210 		rdev->config.cik.max_sh_per_se = 1;
3211 		rdev->config.cik.max_backends_per_se = 2;
3212 		rdev->config.cik.max_texture_channel_caches = 4;
3213 		rdev->config.cik.max_gprs = 256;
3214 		rdev->config.cik.max_gs_threads = 32;
3215 		rdev->config.cik.max_hw_contexts = 8;
3216 
3217 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3218 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3219 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3220 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3221 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3222 		break;
3223 	case CHIP_HAWAII:
3224 		rdev->config.cik.max_shader_engines = 4;
3225 		rdev->config.cik.max_tile_pipes = 16;
3226 		rdev->config.cik.max_cu_per_sh = 11;
3227 		rdev->config.cik.max_sh_per_se = 1;
3228 		rdev->config.cik.max_backends_per_se = 4;
3229 		rdev->config.cik.max_texture_channel_caches = 16;
3230 		rdev->config.cik.max_gprs = 256;
3231 		rdev->config.cik.max_gs_threads = 32;
3232 		rdev->config.cik.max_hw_contexts = 8;
3233 
3234 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3239 		break;
3240 	case CHIP_KAVERI:
3241 		rdev->config.cik.max_shader_engines = 1;
3242 		rdev->config.cik.max_tile_pipes = 4;
3243 		if ((rdev->pdev->device == 0x1304) ||
3244 		    (rdev->pdev->device == 0x1305) ||
3245 		    (rdev->pdev->device == 0x130C) ||
3246 		    (rdev->pdev->device == 0x130F) ||
3247 		    (rdev->pdev->device == 0x1310) ||
3248 		    (rdev->pdev->device == 0x1311) ||
3249 		    (rdev->pdev->device == 0x131C)) {
3250 			rdev->config.cik.max_cu_per_sh = 8;
3251 			rdev->config.cik.max_backends_per_se = 2;
3252 		} else if ((rdev->pdev->device == 0x1309) ||
3253 			   (rdev->pdev->device == 0x130A) ||
3254 			   (rdev->pdev->device == 0x130D) ||
3255 			   (rdev->pdev->device == 0x1313) ||
3256 			   (rdev->pdev->device == 0x131D)) {
3257 			rdev->config.cik.max_cu_per_sh = 6;
3258 			rdev->config.cik.max_backends_per_se = 2;
3259 		} else if ((rdev->pdev->device == 0x1306) ||
3260 			   (rdev->pdev->device == 0x1307) ||
3261 			   (rdev->pdev->device == 0x130B) ||
3262 			   (rdev->pdev->device == 0x130E) ||
3263 			   (rdev->pdev->device == 0x1315) ||
3264 			   (rdev->pdev->device == 0x1318) ||
3265 			   (rdev->pdev->device == 0x131B)) {
3266 			rdev->config.cik.max_cu_per_sh = 4;
3267 			rdev->config.cik.max_backends_per_se = 1;
3268 		} else {
3269 			rdev->config.cik.max_cu_per_sh = 3;
3270 			rdev->config.cik.max_backends_per_se = 1;
3271 		}
3272 		rdev->config.cik.max_sh_per_se = 1;
3273 		rdev->config.cik.max_texture_channel_caches = 4;
3274 		rdev->config.cik.max_gprs = 256;
3275 		rdev->config.cik.max_gs_threads = 16;
3276 		rdev->config.cik.max_hw_contexts = 8;
3277 
3278 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283 		break;
3284 	case CHIP_KABINI:
3285 	case CHIP_MULLINS:
3286 	default:
3287 		rdev->config.cik.max_shader_engines = 1;
3288 		rdev->config.cik.max_tile_pipes = 2;
3289 		rdev->config.cik.max_cu_per_sh = 2;
3290 		rdev->config.cik.max_sh_per_se = 1;
3291 		rdev->config.cik.max_backends_per_se = 1;
3292 		rdev->config.cik.max_texture_channel_caches = 2;
3293 		rdev->config.cik.max_gprs = 256;
3294 		rdev->config.cik.max_gs_threads = 16;
3295 		rdev->config.cik.max_hw_contexts = 8;
3296 
3297 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3298 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3299 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3300 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3301 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3302 		break;
3303 	}
3304 
3305 	/* Initialize HDP */
3306 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3307 		WREG32((0x2c14 + j), 0x00000000);
3308 		WREG32((0x2c18 + j), 0x00000000);
3309 		WREG32((0x2c1c + j), 0x00000000);
3310 		WREG32((0x2c20 + j), 0x00000000);
3311 		WREG32((0x2c24 + j), 0x00000000);
3312 	}
3313 
3314 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3315 	WREG32(SRBM_INT_CNTL, 0x1);
3316 	WREG32(SRBM_INT_ACK, 0x1);
3317 
3318 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3319 
3320 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3321 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3322 
3323 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3324 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3325 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3326 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3327 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3328 		rdev->config.cik.mem_row_size_in_kb = 4;
3329 	/* XXX use MC settings? */
3330 	rdev->config.cik.shader_engine_tile_size = 32;
3331 	rdev->config.cik.num_gpus = 1;
3332 	rdev->config.cik.multi_gpu_tile_size = 64;
3333 
3334 	/* fix up row size */
3335 	gb_addr_config &= ~ROW_SIZE_MASK;
3336 	switch (rdev->config.cik.mem_row_size_in_kb) {
3337 	case 1:
3338 	default:
3339 		gb_addr_config |= ROW_SIZE(0);
3340 		break;
3341 	case 2:
3342 		gb_addr_config |= ROW_SIZE(1);
3343 		break;
3344 	case 4:
3345 		gb_addr_config |= ROW_SIZE(2);
3346 		break;
3347 	}
3348 
3349 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3350 	 * not have bank info, so create a custom tiling dword.
3351 	 * bits 3:0   num_pipes
3352 	 * bits 7:4   num_banks
3353 	 * bits 11:8  group_size
3354 	 * bits 15:12 row_size
3355 	 */
3356 	rdev->config.cik.tile_config = 0;
3357 	switch (rdev->config.cik.num_tile_pipes) {
3358 	case 1:
3359 		rdev->config.cik.tile_config |= (0 << 0);
3360 		break;
3361 	case 2:
3362 		rdev->config.cik.tile_config |= (1 << 0);
3363 		break;
3364 	case 4:
3365 		rdev->config.cik.tile_config |= (2 << 0);
3366 		break;
3367 	case 8:
3368 	default:
3369 		/* XXX what about 12? */
3370 		rdev->config.cik.tile_config |= (3 << 0);
3371 		break;
3372 	}
3373 	rdev->config.cik.tile_config |=
3374 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3375 	rdev->config.cik.tile_config |=
3376 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3377 	rdev->config.cik.tile_config |=
3378 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3379 
3380 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3381 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3382 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3383 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3384 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3385 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3386 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3387 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3388 
3389 	cik_tiling_mode_table_init(rdev);
3390 
3391 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3392 		     rdev->config.cik.max_sh_per_se,
3393 		     rdev->config.cik.max_backends_per_se);
3394 
3395 	rdev->config.cik.active_cus = 0;
3396 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3397 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3398 			rdev->config.cik.active_cus +=
3399 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3400 		}
3401 	}
3402 
3403 	/* set HW defaults for 3D engine */
3404 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3405 
3406 	mutex_lock(&rdev->grbm_idx_mutex);
3407 	/*
3408 	 * making sure that the following register writes will be broadcasted
3409 	 * to all the shaders
3410 	 */
3411 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3412 	WREG32(SX_DEBUG_1, 0x20);
3413 
3414 	WREG32(TA_CNTL_AUX, 0x00010000);
3415 
3416 	tmp = RREG32(SPI_CONFIG_CNTL);
3417 	tmp |= 0x03000000;
3418 	WREG32(SPI_CONFIG_CNTL, tmp);
3419 
3420 	WREG32(SQ_CONFIG, 1);
3421 
3422 	WREG32(DB_DEBUG, 0);
3423 
3424 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3425 	tmp |= 0x00000400;
3426 	WREG32(DB_DEBUG2, tmp);
3427 
3428 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3429 	tmp |= 0x00020200;
3430 	WREG32(DB_DEBUG3, tmp);
3431 
3432 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3433 	tmp |= 0x00018208;
3434 	WREG32(CB_HW_CONTROL, tmp);
3435 
3436 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3437 
3438 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3439 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3440 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3441 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3442 
3443 	WREG32(VGT_NUM_INSTANCES, 1);
3444 
3445 	WREG32(CP_PERFMON_CNTL, 0);
3446 
3447 	WREG32(SQ_CONFIG, 0);
3448 
3449 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3450 					  FORCE_EOV_MAX_REZ_CNT(255)));
3451 
3452 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3453 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3454 
3455 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3456 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3457 
3458 	tmp = RREG32(HDP_MISC_CNTL);
3459 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3460 	WREG32(HDP_MISC_CNTL, tmp);
3461 
3462 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3463 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3464 
3465 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3466 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3467 	mutex_unlock(&rdev->grbm_idx_mutex);
3468 
3469 	udelay(50);
3470 }
3471 
3472 /*
3473  * GPU scratch registers helpers function.
3474  */
3475 /**
3476  * cik_scratch_init - setup driver info for CP scratch regs
3477  *
3478  * @rdev: radeon_device pointer
3479  *
3480  * Set up the number and offset of the CP scratch registers.
3481  * NOTE: use of CP scratch registers is a legacy inferface and
3482  * is not used by default on newer asics (r6xx+).  On newer asics,
3483  * memory buffers are used for fences rather than scratch regs.
3484  */
3485 static void cik_scratch_init(struct radeon_device *rdev)
3486 {
3487 	int i;
3488 
3489 	rdev->scratch.num_reg = 7;
3490 	rdev->scratch.reg_base = SCRATCH_REG0;
3491 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3492 		rdev->scratch.free[i] = true;
3493 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3494 	}
3495 }
3496 
3497 /**
3498  * cik_ring_test - basic gfx ring test
3499  *
3500  * @rdev: radeon_device pointer
3501  * @ring: radeon_ring structure holding ring information
3502  *
3503  * Allocate a scratch register and write to it using the gfx ring (CIK).
3504  * Provides a basic gfx ring test to verify that the ring is working.
3505  * Used by cik_cp_gfx_resume();
3506  * Returns 0 on success, error on failure.
3507  */
3508 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3509 {
3510 	uint32_t scratch;
3511 	uint32_t tmp = 0;
3512 	unsigned i;
3513 	int r;
3514 
3515 	r = radeon_scratch_get(rdev, &scratch);
3516 	if (r) {
3517 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3518 		return r;
3519 	}
3520 	WREG32(scratch, 0xCAFEDEAD);
3521 	r = radeon_ring_lock(rdev, ring, 3);
3522 	if (r) {
3523 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3524 		radeon_scratch_free(rdev, scratch);
3525 		return r;
3526 	}
3527 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3528 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3529 	radeon_ring_write(ring, 0xDEADBEEF);
3530 	radeon_ring_unlock_commit(rdev, ring, false);
3531 
3532 	for (i = 0; i < rdev->usec_timeout; i++) {
3533 		tmp = RREG32(scratch);
3534 		if (tmp == 0xDEADBEEF)
3535 			break;
3536 		DRM_UDELAY(1);
3537 	}
3538 	if (i < rdev->usec_timeout) {
3539 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3540 	} else {
3541 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3542 			  ring->idx, scratch, tmp);
3543 		r = -EINVAL;
3544 	}
3545 	radeon_scratch_free(rdev, scratch);
3546 	return r;
3547 }
3548 
3549 /**
3550  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3551  *
3552  * @rdev: radeon_device pointer
3553  * @ridx: radeon ring index
3554  *
3555  * Emits an hdp flush on the cp.
3556  */
3557 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3558 				       int ridx)
3559 {
3560 	struct radeon_ring *ring = &rdev->ring[ridx];
3561 	u32 ref_and_mask;
3562 
3563 	switch (ring->idx) {
3564 	case CAYMAN_RING_TYPE_CP1_INDEX:
3565 	case CAYMAN_RING_TYPE_CP2_INDEX:
3566 	default:
3567 		switch (ring->me) {
3568 		case 0:
3569 			ref_and_mask = CP2 << ring->pipe;
3570 			break;
3571 		case 1:
3572 			ref_and_mask = CP6 << ring->pipe;
3573 			break;
3574 		default:
3575 			return;
3576 		}
3577 		break;
3578 	case RADEON_RING_TYPE_GFX_INDEX:
3579 		ref_and_mask = CP0;
3580 		break;
3581 	}
3582 
3583 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3584 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3585 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3586 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3587 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3588 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3589 	radeon_ring_write(ring, ref_and_mask);
3590 	radeon_ring_write(ring, ref_and_mask);
3591 	radeon_ring_write(ring, 0x20); /* poll interval */
3592 }
3593 
3594 /**
3595  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3596  *
3597  * @rdev: radeon_device pointer
3598  * @fence: radeon fence object
3599  *
3600  * Emits a fence sequnce number on the gfx ring and flushes
3601  * GPU caches.
3602  */
3603 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3604 			     struct radeon_fence *fence)
3605 {
3606 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3607 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3608 
3609 	/* Workaround for cache flush problems. First send a dummy EOP
3610 	 * event down the pipe with seq one below.
3611 	 */
3612 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614 				 EOP_TC_ACTION_EN |
3615 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616 				 EVENT_INDEX(5)));
3617 	radeon_ring_write(ring, addr & 0xfffffffc);
3618 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3619 				DATA_SEL(1) | INT_SEL(0));
3620 	radeon_ring_write(ring, fence->seq - 1);
3621 	radeon_ring_write(ring, 0);
3622 
3623 	/* Then send the real EOP event down the pipe. */
3624 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626 				 EOP_TC_ACTION_EN |
3627 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628 				 EVENT_INDEX(5)));
3629 	radeon_ring_write(ring, addr & 0xfffffffc);
3630 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3631 	radeon_ring_write(ring, fence->seq);
3632 	radeon_ring_write(ring, 0);
3633 }
3634 
3635 /**
3636  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3637  *
3638  * @rdev: radeon_device pointer
3639  * @fence: radeon fence object
3640  *
3641  * Emits a fence sequnce number on the compute ring and flushes
3642  * GPU caches.
3643  */
3644 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3645 				 struct radeon_fence *fence)
3646 {
3647 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3648 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3649 
3650 	/* RELEASE_MEM - flush caches, send int */
3651 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3652 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3653 				 EOP_TC_ACTION_EN |
3654 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3655 				 EVENT_INDEX(5)));
3656 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3657 	radeon_ring_write(ring, addr & 0xfffffffc);
3658 	radeon_ring_write(ring, upper_32_bits(addr));
3659 	radeon_ring_write(ring, fence->seq);
3660 	radeon_ring_write(ring, 0);
3661 }
3662 
3663 /**
3664  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3665  *
3666  * @rdev: radeon_device pointer
3667  * @ring: radeon ring buffer object
3668  * @semaphore: radeon semaphore object
3669  * @emit_wait: Is this a sempahore wait?
3670  *
3671  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3672  * from running ahead of semaphore waits.
3673  */
3674 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3675 			     struct radeon_ring *ring,
3676 			     struct radeon_semaphore *semaphore,
3677 			     bool emit_wait)
3678 {
3679 	uint64_t addr = semaphore->gpu_addr;
3680 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3681 
3682 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3683 	radeon_ring_write(ring, lower_32_bits(addr));
3684 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3685 
3686 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3687 		/* Prevent the PFP from running ahead of the semaphore wait */
3688 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3689 		radeon_ring_write(ring, 0x0);
3690 	}
3691 
3692 	return true;
3693 }
3694 
3695 /**
3696  * cik_copy_cpdma - copy pages using the CP DMA engine
3697  *
3698  * @rdev: radeon_device pointer
3699  * @src_offset: src GPU address
3700  * @dst_offset: dst GPU address
3701  * @num_gpu_pages: number of GPU pages to xfer
3702  * @resv: reservation object to sync to
3703  *
3704  * Copy GPU paging using the CP DMA engine (CIK+).
3705  * Used by the radeon ttm implementation to move pages if
3706  * registered as the asic copy callback.
3707  */
3708 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3709 				    uint64_t src_offset, uint64_t dst_offset,
3710 				    unsigned num_gpu_pages,
3711 				    struct reservation_object *resv)
3712 {
3713 	struct radeon_fence *fence;
3714 	struct radeon_sync sync;
3715 	int ring_index = rdev->asic->copy.blit_ring_index;
3716 	struct radeon_ring *ring = &rdev->ring[ring_index];
3717 	u32 size_in_bytes, cur_size_in_bytes, control;
3718 	int i, num_loops;
3719 	int r = 0;
3720 
3721 	radeon_sync_create(&sync);
3722 
3723 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3724 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3725 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3726 	if (r) {
3727 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3728 		radeon_sync_free(rdev, &sync, NULL);
3729 		return ERR_PTR(r);
3730 	}
3731 
3732 	radeon_sync_resv(rdev, &sync, resv, false);
3733 	radeon_sync_rings(rdev, &sync, ring->idx);
3734 
3735 	for (i = 0; i < num_loops; i++) {
3736 		cur_size_in_bytes = size_in_bytes;
3737 		if (cur_size_in_bytes > 0x1fffff)
3738 			cur_size_in_bytes = 0x1fffff;
3739 		size_in_bytes -= cur_size_in_bytes;
3740 		control = 0;
3741 		if (size_in_bytes == 0)
3742 			control |= PACKET3_DMA_DATA_CP_SYNC;
3743 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3744 		radeon_ring_write(ring, control);
3745 		radeon_ring_write(ring, lower_32_bits(src_offset));
3746 		radeon_ring_write(ring, upper_32_bits(src_offset));
3747 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3748 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3749 		radeon_ring_write(ring, cur_size_in_bytes);
3750 		src_offset += cur_size_in_bytes;
3751 		dst_offset += cur_size_in_bytes;
3752 	}
3753 
3754 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3755 	if (r) {
3756 		radeon_ring_unlock_undo(rdev, ring);
3757 		radeon_sync_free(rdev, &sync, NULL);
3758 		return ERR_PTR(r);
3759 	}
3760 
3761 	radeon_ring_unlock_commit(rdev, ring, false);
3762 	radeon_sync_free(rdev, &sync, fence);
3763 
3764 	return fence;
3765 }
3766 
3767 /*
3768  * IB stuff
3769  */
3770 /**
3771  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3772  *
3773  * @rdev: radeon_device pointer
3774  * @ib: radeon indirect buffer object
3775  *
3776  * Emits a DE (drawing engine) or CE (constant engine) IB
3777  * on the gfx ring.  IBs are usually generated by userspace
3778  * acceleration drivers and submitted to the kernel for
3779  * scheduling on the ring.  This function schedules the IB
3780  * on the gfx ring for execution by the GPU.
3781  */
3782 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3783 {
3784 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3785 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3786 	u32 header, control = INDIRECT_BUFFER_VALID;
3787 
3788 	if (ib->is_const_ib) {
3789 		/* set switch buffer packet before const IB */
3790 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3791 		radeon_ring_write(ring, 0);
3792 
3793 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3794 	} else {
3795 		u32 next_rptr;
3796 		if (ring->rptr_save_reg) {
3797 			next_rptr = ring->wptr + 3 + 4;
3798 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3799 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3800 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3801 			radeon_ring_write(ring, next_rptr);
3802 		} else if (rdev->wb.enabled) {
3803 			next_rptr = ring->wptr + 5 + 4;
3804 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3805 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3806 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3807 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3808 			radeon_ring_write(ring, next_rptr);
3809 		}
3810 
3811 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3812 	}
3813 
3814 	control |= ib->length_dw | (vm_id << 24);
3815 
3816 	radeon_ring_write(ring, header);
3817 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3818 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3819 	radeon_ring_write(ring, control);
3820 }
3821 
3822 /**
3823  * cik_ib_test - basic gfx ring IB test
3824  *
3825  * @rdev: radeon_device pointer
3826  * @ring: radeon_ring structure holding ring information
3827  *
3828  * Allocate an IB and execute it on the gfx ring (CIK).
3829  * Provides a basic gfx ring test to verify that IBs are working.
3830  * Returns 0 on success, error on failure.
3831  */
3832 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3833 {
3834 	struct radeon_ib ib;
3835 	uint32_t scratch;
3836 	uint32_t tmp = 0;
3837 	unsigned i;
3838 	int r;
3839 
3840 	r = radeon_scratch_get(rdev, &scratch);
3841 	if (r) {
3842 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3843 		return r;
3844 	}
3845 	WREG32(scratch, 0xCAFEDEAD);
3846 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3847 	if (r) {
3848 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3849 		radeon_scratch_free(rdev, scratch);
3850 		return r;
3851 	}
3852 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3853 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3854 	ib.ptr[2] = 0xDEADBEEF;
3855 	ib.length_dw = 3;
3856 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3857 	if (r) {
3858 		radeon_scratch_free(rdev, scratch);
3859 		radeon_ib_free(rdev, &ib);
3860 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3861 		return r;
3862 	}
3863 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3864 		RADEON_USEC_IB_TEST_TIMEOUT));
3865 	if (r < 0) {
3866 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3867 		radeon_scratch_free(rdev, scratch);
3868 		radeon_ib_free(rdev, &ib);
3869 		return r;
3870 	} else if (r == 0) {
3871 		DRM_ERROR("radeon: fence wait timed out.\n");
3872 		radeon_scratch_free(rdev, scratch);
3873 		radeon_ib_free(rdev, &ib);
3874 		return -ETIMEDOUT;
3875 	}
3876 	r = 0;
3877 	for (i = 0; i < rdev->usec_timeout; i++) {
3878 		tmp = RREG32(scratch);
3879 		if (tmp == 0xDEADBEEF)
3880 			break;
3881 		DRM_UDELAY(1);
3882 	}
3883 	if (i < rdev->usec_timeout) {
3884 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3885 	} else {
3886 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3887 			  scratch, tmp);
3888 		r = -EINVAL;
3889 	}
3890 	radeon_scratch_free(rdev, scratch);
3891 	radeon_ib_free(rdev, &ib);
3892 	return r;
3893 }
3894 
3895 /*
3896  * CP.
3897  * On CIK, gfx and compute now have independant command processors.
3898  *
3899  * GFX
3900  * Gfx consists of a single ring and can process both gfx jobs and
3901  * compute jobs.  The gfx CP consists of three microengines (ME):
3902  * PFP - Pre-Fetch Parser
3903  * ME - Micro Engine
3904  * CE - Constant Engine
3905  * The PFP and ME make up what is considered the Drawing Engine (DE).
3906  * The CE is an asynchronous engine used for updating buffer desciptors
3907  * used by the DE so that they can be loaded into cache in parallel
3908  * while the DE is processing state update packets.
3909  *
3910  * Compute
3911  * The compute CP consists of two microengines (ME):
3912  * MEC1 - Compute MicroEngine 1
3913  * MEC2 - Compute MicroEngine 2
3914  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3915  * The queues are exposed to userspace and are programmed directly
3916  * by the compute runtime.
3917  */
3918 /**
3919  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3920  *
3921  * @rdev: radeon_device pointer
3922  * @enable: enable or disable the MEs
3923  *
3924  * Halts or unhalts the gfx MEs.
3925  */
3926 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3927 {
3928 	if (enable)
3929 		WREG32(CP_ME_CNTL, 0);
3930 	else {
3931 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3932 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3933 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3934 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3935 	}
3936 	udelay(50);
3937 }
3938 
3939 /**
3940  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3941  *
3942  * @rdev: radeon_device pointer
3943  *
3944  * Loads the gfx PFP, ME, and CE ucode.
3945  * Returns 0 for success, -EINVAL if the ucode is not available.
3946  */
3947 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3948 {
3949 	int i;
3950 
3951 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3952 		return -EINVAL;
3953 
3954 	cik_cp_gfx_enable(rdev, false);
3955 
3956 	if (rdev->new_fw) {
3957 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3958 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3959 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3960 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3961 		const struct gfx_firmware_header_v1_0 *me_hdr =
3962 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3963 		const __le32 *fw_data;
3964 		u32 fw_size;
3965 
3966 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3967 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3968 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3969 
3970 		/* PFP */
3971 		fw_data = (const __le32 *)
3972 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3973 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3974 		WREG32(CP_PFP_UCODE_ADDR, 0);
3975 		for (i = 0; i < fw_size; i++)
3976 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3977 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3978 
3979 		/* CE */
3980 		fw_data = (const __le32 *)
3981 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3982 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3983 		WREG32(CP_CE_UCODE_ADDR, 0);
3984 		for (i = 0; i < fw_size; i++)
3985 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3986 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3987 
3988 		/* ME */
3989 		fw_data = (const __be32 *)
3990 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3991 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3992 		WREG32(CP_ME_RAM_WADDR, 0);
3993 		for (i = 0; i < fw_size; i++)
3994 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3995 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3996 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3997 	} else {
3998 		const __be32 *fw_data;
3999 
4000 		/* PFP */
4001 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4002 		WREG32(CP_PFP_UCODE_ADDR, 0);
4003 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4004 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4005 		WREG32(CP_PFP_UCODE_ADDR, 0);
4006 
4007 		/* CE */
4008 		fw_data = (const __be32 *)rdev->ce_fw->data;
4009 		WREG32(CP_CE_UCODE_ADDR, 0);
4010 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4011 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4012 		WREG32(CP_CE_UCODE_ADDR, 0);
4013 
4014 		/* ME */
4015 		fw_data = (const __be32 *)rdev->me_fw->data;
4016 		WREG32(CP_ME_RAM_WADDR, 0);
4017 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4018 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4019 		WREG32(CP_ME_RAM_WADDR, 0);
4020 	}
4021 
4022 	return 0;
4023 }
4024 
4025 /**
4026  * cik_cp_gfx_start - start the gfx ring
4027  *
4028  * @rdev: radeon_device pointer
4029  *
4030  * Enables the ring and loads the clear state context and other
4031  * packets required to init the ring.
4032  * Returns 0 for success, error for failure.
4033  */
4034 static int cik_cp_gfx_start(struct radeon_device *rdev)
4035 {
4036 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4037 	int r, i;
4038 
4039 	/* init the CP */
4040 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4041 	WREG32(CP_ENDIAN_SWAP, 0);
4042 	WREG32(CP_DEVICE_ID, 1);
4043 
4044 	cik_cp_gfx_enable(rdev, true);
4045 
4046 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4047 	if (r) {
4048 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4049 		return r;
4050 	}
4051 
4052 	/* init the CE partitions.  CE only used for gfx on CIK */
4053 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4054 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4055 	radeon_ring_write(ring, 0x8000);
4056 	radeon_ring_write(ring, 0x8000);
4057 
4058 	/* setup clear context state */
4059 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4060 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4061 
4062 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4063 	radeon_ring_write(ring, 0x80000000);
4064 	radeon_ring_write(ring, 0x80000000);
4065 
4066 	for (i = 0; i < cik_default_size; i++)
4067 		radeon_ring_write(ring, cik_default_state[i]);
4068 
4069 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4070 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4071 
4072 	/* set clear context state */
4073 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4074 	radeon_ring_write(ring, 0);
4075 
4076 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4077 	radeon_ring_write(ring, 0x00000316);
4078 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4079 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4080 
4081 	radeon_ring_unlock_commit(rdev, ring, false);
4082 
4083 	return 0;
4084 }
4085 
4086 /**
4087  * cik_cp_gfx_fini - stop the gfx ring
4088  *
4089  * @rdev: radeon_device pointer
4090  *
4091  * Stop the gfx ring and tear down the driver ring
4092  * info.
4093  */
4094 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4095 {
4096 	cik_cp_gfx_enable(rdev, false);
4097 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4098 }
4099 
4100 /**
4101  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4102  *
4103  * @rdev: radeon_device pointer
4104  *
4105  * Program the location and size of the gfx ring buffer
4106  * and test it to make sure it's working.
4107  * Returns 0 for success, error for failure.
4108  */
4109 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4110 {
4111 	struct radeon_ring *ring;
4112 	u32 tmp;
4113 	u32 rb_bufsz;
4114 	u64 rb_addr;
4115 	int r;
4116 
4117 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4118 	if (rdev->family != CHIP_HAWAII)
4119 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4120 
4121 	/* Set the write pointer delay */
4122 	WREG32(CP_RB_WPTR_DELAY, 0);
4123 
4124 	/* set the RB to use vmid 0 */
4125 	WREG32(CP_RB_VMID, 0);
4126 
4127 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4128 
4129 	/* ring 0 - compute and gfx */
4130 	/* Set ring buffer size */
4131 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4132 	rb_bufsz = order_base_2(ring->ring_size / 8);
4133 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4134 #ifdef __BIG_ENDIAN
4135 	tmp |= BUF_SWAP_32BIT;
4136 #endif
4137 	WREG32(CP_RB0_CNTL, tmp);
4138 
4139 	/* Initialize the ring buffer's read and write pointers */
4140 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4141 	ring->wptr = 0;
4142 	WREG32(CP_RB0_WPTR, ring->wptr);
4143 
4144 	/* set the wb address wether it's enabled or not */
4145 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4146 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4147 
4148 	/* scratch register shadowing is no longer supported */
4149 	WREG32(SCRATCH_UMSK, 0);
4150 
4151 	if (!rdev->wb.enabled)
4152 		tmp |= RB_NO_UPDATE;
4153 
4154 	mdelay(1);
4155 	WREG32(CP_RB0_CNTL, tmp);
4156 
4157 	rb_addr = ring->gpu_addr >> 8;
4158 	WREG32(CP_RB0_BASE, rb_addr);
4159 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4160 
4161 	/* start the ring */
4162 	cik_cp_gfx_start(rdev);
4163 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4164 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4165 	if (r) {
4166 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4167 		return r;
4168 	}
4169 
4170 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4171 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4172 
4173 	return 0;
4174 }
4175 
4176 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4177 		     struct radeon_ring *ring)
4178 {
4179 	u32 rptr;
4180 
4181 	if (rdev->wb.enabled)
4182 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4183 	else
4184 		rptr = RREG32(CP_RB0_RPTR);
4185 
4186 	return rptr;
4187 }
4188 
4189 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4190 		     struct radeon_ring *ring)
4191 {
4192 	u32 wptr;
4193 
4194 	wptr = RREG32(CP_RB0_WPTR);
4195 
4196 	return wptr;
4197 }
4198 
4199 void cik_gfx_set_wptr(struct radeon_device *rdev,
4200 		      struct radeon_ring *ring)
4201 {
4202 	WREG32(CP_RB0_WPTR, ring->wptr);
4203 	(void)RREG32(CP_RB0_WPTR);
4204 }
4205 
4206 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 rptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4213 	} else {
4214 		mutex_lock(&rdev->srbm_mutex);
4215 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216 		rptr = RREG32(CP_HQD_PQ_RPTR);
4217 		cik_srbm_select(rdev, 0, 0, 0, 0);
4218 		mutex_unlock(&rdev->srbm_mutex);
4219 	}
4220 
4221 	return rptr;
4222 }
4223 
4224 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225 			 struct radeon_ring *ring)
4226 {
4227 	u32 wptr;
4228 
4229 	if (rdev->wb.enabled) {
4230 		/* XXX check if swapping is necessary on BE */
4231 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4232 	} else {
4233 		mutex_lock(&rdev->srbm_mutex);
4234 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235 		wptr = RREG32(CP_HQD_PQ_WPTR);
4236 		cik_srbm_select(rdev, 0, 0, 0, 0);
4237 		mutex_unlock(&rdev->srbm_mutex);
4238 	}
4239 
4240 	return wptr;
4241 }
4242 
4243 void cik_compute_set_wptr(struct radeon_device *rdev,
4244 			  struct radeon_ring *ring)
4245 {
4246 	/* XXX check if swapping is necessary on BE */
4247 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4249 }
4250 
4251 static void cik_compute_stop(struct radeon_device *rdev,
4252 			     struct radeon_ring *ring)
4253 {
4254 	u32 j, tmp;
4255 
4256 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257 	/* Disable wptr polling. */
4258 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259 	tmp &= ~WPTR_POLL_EN;
4260 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261 	/* Disable HQD. */
4262 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4263 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264 		for (j = 0; j < rdev->usec_timeout; j++) {
4265 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266 				break;
4267 			udelay(1);
4268 		}
4269 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270 		WREG32(CP_HQD_PQ_RPTR, 0);
4271 		WREG32(CP_HQD_PQ_WPTR, 0);
4272 	}
4273 	cik_srbm_select(rdev, 0, 0, 0, 0);
4274 }
4275 
4276 /**
4277  * cik_cp_compute_enable - enable/disable the compute CP MEs
4278  *
4279  * @rdev: radeon_device pointer
4280  * @enable: enable or disable the MEs
4281  *
4282  * Halts or unhalts the compute MEs.
4283  */
4284 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285 {
4286 	if (enable)
4287 		WREG32(CP_MEC_CNTL, 0);
4288 	else {
4289 		/*
4290 		 * To make hibernation reliable we need to clear compute ring
4291 		 * configuration before halting the compute ring.
4292 		 */
4293 		mutex_lock(&rdev->srbm_mutex);
4294 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296 		mutex_unlock(&rdev->srbm_mutex);
4297 
4298 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301 	}
4302 	udelay(50);
4303 }
4304 
4305 /**
4306  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Loads the compute MEC1&2 ucode.
4311  * Returns 0 for success, -EINVAL if the ucode is not available.
4312  */
4313 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314 {
4315 	int i;
4316 
4317 	if (!rdev->mec_fw)
4318 		return -EINVAL;
4319 
4320 	cik_cp_compute_enable(rdev, false);
4321 
4322 	if (rdev->new_fw) {
4323 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4324 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325 		const __le32 *fw_data;
4326 		u32 fw_size;
4327 
4328 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329 
4330 		/* MEC1 */
4331 		fw_data = (const __le32 *)
4332 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335 		for (i = 0; i < fw_size; i++)
4336 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338 
4339 		/* MEC2 */
4340 		if (rdev->family == CHIP_KAVERI) {
4341 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343 
4344 			fw_data = (const __le32 *)
4345 				(rdev->mec2_fw->data +
4346 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349 			for (i = 0; i < fw_size; i++)
4350 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352 		}
4353 	} else {
4354 		const __be32 *fw_data;
4355 
4356 		/* MEC1 */
4357 		fw_data = (const __be32 *)rdev->mec_fw->data;
4358 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362 
4363 		if (rdev->family == CHIP_KAVERI) {
4364 			/* MEC2 */
4365 			fw_data = (const __be32 *)rdev->mec_fw->data;
4366 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370 		}
4371 	}
4372 
4373 	return 0;
4374 }
4375 
4376 /**
4377  * cik_cp_compute_start - start the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Enable the compute queues.
4382  * Returns 0 for success, error for failure.
4383  */
4384 static int cik_cp_compute_start(struct radeon_device *rdev)
4385 {
4386 	cik_cp_compute_enable(rdev, true);
4387 
4388 	return 0;
4389 }
4390 
4391 /**
4392  * cik_cp_compute_fini - stop the compute queues
4393  *
4394  * @rdev: radeon_device pointer
4395  *
4396  * Stop the compute queues and tear down the driver queue
4397  * info.
4398  */
4399 static void cik_cp_compute_fini(struct radeon_device *rdev)
4400 {
4401 	int i, idx, r;
4402 
4403 	cik_cp_compute_enable(rdev, false);
4404 
4405 	for (i = 0; i < 2; i++) {
4406 		if (i == 0)
4407 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408 		else
4409 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410 
4411 		if (rdev->ring[idx].mqd_obj) {
4412 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413 			if (unlikely(r != 0))
4414 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415 
4416 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418 
4419 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420 			rdev->ring[idx].mqd_obj = NULL;
4421 		}
4422 	}
4423 }
4424 
4425 static void cik_mec_fini(struct radeon_device *rdev)
4426 {
4427 	int r;
4428 
4429 	if (rdev->mec.hpd_eop_obj) {
4430 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431 		if (unlikely(r != 0))
4432 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435 
4436 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437 		rdev->mec.hpd_eop_obj = NULL;
4438 	}
4439 }
4440 
4441 #define MEC_HPD_SIZE 2048
4442 
4443 static int cik_mec_init(struct radeon_device *rdev)
4444 {
4445 	int r;
4446 	u32 *hpd;
4447 
4448 	/*
4449 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4452 	 * be handled by KFD
4453 	 */
4454 	rdev->mec.num_mec = 1;
4455 	rdev->mec.num_pipe = 1;
4456 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457 
4458 	if (rdev->mec.hpd_eop_obj == NULL) {
4459 		r = radeon_bo_create(rdev,
4460 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461 				     PAGE_SIZE, true,
4462 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463 				     &rdev->mec.hpd_eop_obj);
4464 		if (r) {
4465 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466 			return r;
4467 		}
4468 	}
4469 
4470 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471 	if (unlikely(r != 0)) {
4472 		cik_mec_fini(rdev);
4473 		return r;
4474 	}
4475 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476 			  &rdev->mec.hpd_eop_gpu_addr);
4477 	if (r) {
4478 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479 		cik_mec_fini(rdev);
4480 		return r;
4481 	}
4482 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483 	if (r) {
4484 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485 		cik_mec_fini(rdev);
4486 		return r;
4487 	}
4488 
4489 	/* clear memory.  Not sure if this is required or not */
4490 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491 
4492 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494 
4495 	return 0;
4496 }
4497 
4498 struct hqd_registers
4499 {
4500 	u32 cp_mqd_base_addr;
4501 	u32 cp_mqd_base_addr_hi;
4502 	u32 cp_hqd_active;
4503 	u32 cp_hqd_vmid;
4504 	u32 cp_hqd_persistent_state;
4505 	u32 cp_hqd_pipe_priority;
4506 	u32 cp_hqd_queue_priority;
4507 	u32 cp_hqd_quantum;
4508 	u32 cp_hqd_pq_base;
4509 	u32 cp_hqd_pq_base_hi;
4510 	u32 cp_hqd_pq_rptr;
4511 	u32 cp_hqd_pq_rptr_report_addr;
4512 	u32 cp_hqd_pq_rptr_report_addr_hi;
4513 	u32 cp_hqd_pq_wptr_poll_addr;
4514 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4515 	u32 cp_hqd_pq_doorbell_control;
4516 	u32 cp_hqd_pq_wptr;
4517 	u32 cp_hqd_pq_control;
4518 	u32 cp_hqd_ib_base_addr;
4519 	u32 cp_hqd_ib_base_addr_hi;
4520 	u32 cp_hqd_ib_rptr;
4521 	u32 cp_hqd_ib_control;
4522 	u32 cp_hqd_iq_timer;
4523 	u32 cp_hqd_iq_rptr;
4524 	u32 cp_hqd_dequeue_request;
4525 	u32 cp_hqd_dma_offload;
4526 	u32 cp_hqd_sema_cmd;
4527 	u32 cp_hqd_msg_type;
4528 	u32 cp_hqd_atomic0_preop_lo;
4529 	u32 cp_hqd_atomic0_preop_hi;
4530 	u32 cp_hqd_atomic1_preop_lo;
4531 	u32 cp_hqd_atomic1_preop_hi;
4532 	u32 cp_hqd_hq_scheduler0;
4533 	u32 cp_hqd_hq_scheduler1;
4534 	u32 cp_mqd_control;
4535 };
4536 
4537 struct bonaire_mqd
4538 {
4539 	u32 header;
4540 	u32 dispatch_initiator;
4541 	u32 dimensions[3];
4542 	u32 start_idx[3];
4543 	u32 num_threads[3];
4544 	u32 pipeline_stat_enable;
4545 	u32 perf_counter_enable;
4546 	u32 pgm[2];
4547 	u32 tba[2];
4548 	u32 tma[2];
4549 	u32 pgm_rsrc[2];
4550 	u32 vmid;
4551 	u32 resource_limits;
4552 	u32 static_thread_mgmt01[2];
4553 	u32 tmp_ring_size;
4554 	u32 static_thread_mgmt23[2];
4555 	u32 restart[3];
4556 	u32 thread_trace_enable;
4557 	u32 reserved1;
4558 	u32 user_data[16];
4559 	u32 vgtcs_invoke_count[2];
4560 	struct hqd_registers queue_state;
4561 	u32 dequeue_cntr;
4562 	u32 interrupt_queue[64];
4563 };
4564 
4565 /**
4566  * cik_cp_compute_resume - setup the compute queue registers
4567  *
4568  * @rdev: radeon_device pointer
4569  *
4570  * Program the compute queues and test them to make sure they
4571  * are working.
4572  * Returns 0 for success, error for failure.
4573  */
4574 static int cik_cp_compute_resume(struct radeon_device *rdev)
4575 {
4576 	int r, i, j, idx;
4577 	u32 tmp;
4578 	bool use_doorbell = true;
4579 	u64 hqd_gpu_addr;
4580 	u64 mqd_gpu_addr;
4581 	u64 eop_gpu_addr;
4582 	u64 wb_gpu_addr;
4583 	u32 *buf;
4584 	struct bonaire_mqd *mqd;
4585 
4586 	r = cik_cp_compute_start(rdev);
4587 	if (r)
4588 		return r;
4589 
4590 	/* fix up chicken bits */
4591 	tmp = RREG32(CP_CPF_DEBUG);
4592 	tmp |= (1 << 23);
4593 	WREG32(CP_CPF_DEBUG, tmp);
4594 
4595 	/* init the pipes */
4596 	mutex_lock(&rdev->srbm_mutex);
4597 
4598 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599 
4600 	cik_srbm_select(rdev, 0, 0, 0, 0);
4601 
4602 	/* write the EOP addr */
4603 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605 
4606 	/* set the VMID assigned */
4607 	WREG32(CP_HPD_EOP_VMID, 0);
4608 
4609 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4611 	tmp &= ~EOP_SIZE_MASK;
4612 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4614 
4615 	mutex_unlock(&rdev->srbm_mutex);
4616 
4617 	/* init the queues.  Just two for now. */
4618 	for (i = 0; i < 2; i++) {
4619 		if (i == 0)
4620 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621 		else
4622 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623 
4624 		if (rdev->ring[idx].mqd_obj == NULL) {
4625 			r = radeon_bo_create(rdev,
4626 					     sizeof(struct bonaire_mqd),
4627 					     PAGE_SIZE, true,
4628 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629 					     NULL, &rdev->ring[idx].mqd_obj);
4630 			if (r) {
4631 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632 				return r;
4633 			}
4634 		}
4635 
4636 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637 		if (unlikely(r != 0)) {
4638 			cik_cp_compute_fini(rdev);
4639 			return r;
4640 		}
4641 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642 				  &mqd_gpu_addr);
4643 		if (r) {
4644 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645 			cik_cp_compute_fini(rdev);
4646 			return r;
4647 		}
4648 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649 		if (r) {
4650 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651 			cik_cp_compute_fini(rdev);
4652 			return r;
4653 		}
4654 
4655 		/* init the mqd struct */
4656 		memset(buf, 0, sizeof(struct bonaire_mqd));
4657 
4658 		mqd = (struct bonaire_mqd *)buf;
4659 		mqd->header = 0xC0310800;
4660 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4661 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4662 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4663 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4664 
4665 		mutex_lock(&rdev->srbm_mutex);
4666 		cik_srbm_select(rdev, rdev->ring[idx].me,
4667 				rdev->ring[idx].pipe,
4668 				rdev->ring[idx].queue, 0);
4669 
4670 		/* disable wptr polling */
4671 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672 		tmp &= ~WPTR_POLL_EN;
4673 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674 
4675 		/* enable doorbell? */
4676 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4677 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678 		if (use_doorbell)
4679 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680 		else
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4684 
4685 		/* disable the queue if it's active */
4686 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4687 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4688 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4689 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4690 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691 			for (j = 0; j < rdev->usec_timeout; j++) {
4692 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693 					break;
4694 				udelay(1);
4695 			}
4696 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699 		}
4700 
4701 		/* set the pointer to the MQD */
4702 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706 		/* set MQD vmid to 0 */
4707 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710 
4711 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717 
4718 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4719 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720 		mqd->queue_state.cp_hqd_pq_control &=
4721 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722 
4723 		mqd->queue_state.cp_hqd_pq_control |=
4724 			order_base_2(rdev->ring[idx].ring_size / 8);
4725 		mqd->queue_state.cp_hqd_pq_control |=
4726 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727 #ifdef __BIG_ENDIAN
4728 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729 #endif
4730 		mqd->queue_state.cp_hqd_pq_control &=
4731 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732 		mqd->queue_state.cp_hqd_pq_control |=
4733 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735 
4736 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737 		if (i == 0)
4738 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739 		else
4740 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746 
4747 		/* set the wb address wether it's enabled or not */
4748 		if (i == 0)
4749 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750 		else
4751 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754 			upper_32_bits(wb_gpu_addr) & 0xffff;
4755 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759 
4760 		/* enable the doorbell if requested */
4761 		if (use_doorbell) {
4762 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4763 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4770 
4771 		} else {
4772 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773 		}
4774 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4776 
4777 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778 		rdev->ring[idx].wptr = 0;
4779 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782 
4783 		/* set the vmid for the queue */
4784 		mqd->queue_state.cp_hqd_vmid = 0;
4785 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786 
4787 		/* activate the queue */
4788 		mqd->queue_state.cp_hqd_active = 1;
4789 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790 
4791 		cik_srbm_select(rdev, 0, 0, 0, 0);
4792 		mutex_unlock(&rdev->srbm_mutex);
4793 
4794 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796 
4797 		rdev->ring[idx].ready = true;
4798 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799 		if (r)
4800 			rdev->ring[idx].ready = false;
4801 	}
4802 
4803 	return 0;
4804 }
4805 
4806 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807 {
4808 	cik_cp_gfx_enable(rdev, enable);
4809 	cik_cp_compute_enable(rdev, enable);
4810 }
4811 
4812 static int cik_cp_load_microcode(struct radeon_device *rdev)
4813 {
4814 	int r;
4815 
4816 	r = cik_cp_gfx_load_microcode(rdev);
4817 	if (r)
4818 		return r;
4819 	r = cik_cp_compute_load_microcode(rdev);
4820 	if (r)
4821 		return r;
4822 
4823 	return 0;
4824 }
4825 
4826 static void cik_cp_fini(struct radeon_device *rdev)
4827 {
4828 	cik_cp_gfx_fini(rdev);
4829 	cik_cp_compute_fini(rdev);
4830 }
4831 
4832 static int cik_cp_resume(struct radeon_device *rdev)
4833 {
4834 	int r;
4835 
4836 	cik_enable_gui_idle_interrupt(rdev, false);
4837 
4838 	r = cik_cp_load_microcode(rdev);
4839 	if (r)
4840 		return r;
4841 
4842 	r = cik_cp_gfx_resume(rdev);
4843 	if (r)
4844 		return r;
4845 	r = cik_cp_compute_resume(rdev);
4846 	if (r)
4847 		return r;
4848 
4849 	cik_enable_gui_idle_interrupt(rdev, true);
4850 
4851 	return 0;
4852 }
4853 
4854 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855 {
4856 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857 		RREG32(GRBM_STATUS));
4858 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859 		RREG32(GRBM_STATUS2));
4860 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861 		RREG32(GRBM_STATUS_SE0));
4862 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863 		RREG32(GRBM_STATUS_SE1));
4864 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865 		RREG32(GRBM_STATUS_SE2));
4866 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867 		RREG32(GRBM_STATUS_SE3));
4868 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869 		RREG32(SRBM_STATUS));
4870 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871 		RREG32(SRBM_STATUS2));
4872 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878 		 RREG32(CP_STALLED_STAT1));
4879 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880 		 RREG32(CP_STALLED_STAT2));
4881 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882 		 RREG32(CP_STALLED_STAT3));
4883 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884 		 RREG32(CP_CPF_BUSY_STAT));
4885 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886 		 RREG32(CP_CPF_STALLED_STAT1));
4887 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890 		 RREG32(CP_CPC_STALLED_STAT1));
4891 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892 }
4893 
4894 /**
4895  * cik_gpu_check_soft_reset - check which blocks are busy
4896  *
4897  * @rdev: radeon_device pointer
4898  *
4899  * Check which blocks are busy and return the relevant reset
4900  * mask to be used by cik_gpu_soft_reset().
4901  * Returns a mask of the blocks to be reset.
4902  */
4903 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904 {
4905 	u32 reset_mask = 0;
4906 	u32 tmp;
4907 
4908 	/* GRBM_STATUS */
4909 	tmp = RREG32(GRBM_STATUS);
4910 	if (tmp & (PA_BUSY | SC_BUSY |
4911 		   BCI_BUSY | SX_BUSY |
4912 		   TA_BUSY | VGT_BUSY |
4913 		   DB_BUSY | CB_BUSY |
4914 		   GDS_BUSY | SPI_BUSY |
4915 		   IA_BUSY | IA_BUSY_NO_DMA))
4916 		reset_mask |= RADEON_RESET_GFX;
4917 
4918 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919 		reset_mask |= RADEON_RESET_CP;
4920 
4921 	/* GRBM_STATUS2 */
4922 	tmp = RREG32(GRBM_STATUS2);
4923 	if (tmp & RLC_BUSY)
4924 		reset_mask |= RADEON_RESET_RLC;
4925 
4926 	/* SDMA0_STATUS_REG */
4927 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928 	if (!(tmp & SDMA_IDLE))
4929 		reset_mask |= RADEON_RESET_DMA;
4930 
4931 	/* SDMA1_STATUS_REG */
4932 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933 	if (!(tmp & SDMA_IDLE))
4934 		reset_mask |= RADEON_RESET_DMA1;
4935 
4936 	/* SRBM_STATUS2 */
4937 	tmp = RREG32(SRBM_STATUS2);
4938 	if (tmp & SDMA_BUSY)
4939 		reset_mask |= RADEON_RESET_DMA;
4940 
4941 	if (tmp & SDMA1_BUSY)
4942 		reset_mask |= RADEON_RESET_DMA1;
4943 
4944 	/* SRBM_STATUS */
4945 	tmp = RREG32(SRBM_STATUS);
4946 
4947 	if (tmp & IH_BUSY)
4948 		reset_mask |= RADEON_RESET_IH;
4949 
4950 	if (tmp & SEM_BUSY)
4951 		reset_mask |= RADEON_RESET_SEM;
4952 
4953 	if (tmp & GRBM_RQ_PENDING)
4954 		reset_mask |= RADEON_RESET_GRBM;
4955 
4956 	if (tmp & VMC_BUSY)
4957 		reset_mask |= RADEON_RESET_VMC;
4958 
4959 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960 		   MCC_BUSY | MCD_BUSY))
4961 		reset_mask |= RADEON_RESET_MC;
4962 
4963 	if (evergreen_is_display_hung(rdev))
4964 		reset_mask |= RADEON_RESET_DISPLAY;
4965 
4966 	/* Skip MC reset as it's mostly likely not hung, just busy */
4967 	if (reset_mask & RADEON_RESET_MC) {
4968 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969 		reset_mask &= ~RADEON_RESET_MC;
4970 	}
4971 
4972 	return reset_mask;
4973 }
4974 
4975 /**
4976  * cik_gpu_soft_reset - soft reset GPU
4977  *
4978  * @rdev: radeon_device pointer
4979  * @reset_mask: mask of which blocks to reset
4980  *
4981  * Soft reset the blocks specified in @reset_mask.
4982  */
4983 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984 {
4985 	struct evergreen_mc_save save;
4986 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987 	u32 tmp;
4988 
4989 	if (reset_mask == 0)
4990 		return;
4991 
4992 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993 
4994 	cik_print_gpu_status_regs(rdev);
4995 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999 
5000 	/* disable CG/PG */
5001 	cik_fini_pg(rdev);
5002 	cik_fini_cg(rdev);
5003 
5004 	/* stop the rlc */
5005 	cik_rlc_stop(rdev);
5006 
5007 	/* Disable GFX parsing/prefetching */
5008 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009 
5010 	/* Disable MEC parsing/prefetching */
5011 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012 
5013 	if (reset_mask & RADEON_RESET_DMA) {
5014 		/* sdma0 */
5015 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016 		tmp |= SDMA_HALT;
5017 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018 	}
5019 	if (reset_mask & RADEON_RESET_DMA1) {
5020 		/* sdma1 */
5021 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022 		tmp |= SDMA_HALT;
5023 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024 	}
5025 
5026 	evergreen_mc_stop(rdev, &save);
5027 	if (evergreen_mc_wait_for_idle(rdev)) {
5028 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029 	}
5030 
5031 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033 
5034 	if (reset_mask & RADEON_RESET_CP) {
5035 		grbm_soft_reset |= SOFT_RESET_CP;
5036 
5037 		srbm_soft_reset |= SOFT_RESET_GRBM;
5038 	}
5039 
5040 	if (reset_mask & RADEON_RESET_DMA)
5041 		srbm_soft_reset |= SOFT_RESET_SDMA;
5042 
5043 	if (reset_mask & RADEON_RESET_DMA1)
5044 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5045 
5046 	if (reset_mask & RADEON_RESET_DISPLAY)
5047 		srbm_soft_reset |= SOFT_RESET_DC;
5048 
5049 	if (reset_mask & RADEON_RESET_RLC)
5050 		grbm_soft_reset |= SOFT_RESET_RLC;
5051 
5052 	if (reset_mask & RADEON_RESET_SEM)
5053 		srbm_soft_reset |= SOFT_RESET_SEM;
5054 
5055 	if (reset_mask & RADEON_RESET_IH)
5056 		srbm_soft_reset |= SOFT_RESET_IH;
5057 
5058 	if (reset_mask & RADEON_RESET_GRBM)
5059 		srbm_soft_reset |= SOFT_RESET_GRBM;
5060 
5061 	if (reset_mask & RADEON_RESET_VMC)
5062 		srbm_soft_reset |= SOFT_RESET_VMC;
5063 
5064 	if (!(rdev->flags & RADEON_IS_IGP)) {
5065 		if (reset_mask & RADEON_RESET_MC)
5066 			srbm_soft_reset |= SOFT_RESET_MC;
5067 	}
5068 
5069 	if (grbm_soft_reset) {
5070 		tmp = RREG32(GRBM_SOFT_RESET);
5071 		tmp |= grbm_soft_reset;
5072 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073 		WREG32(GRBM_SOFT_RESET, tmp);
5074 		tmp = RREG32(GRBM_SOFT_RESET);
5075 
5076 		udelay(50);
5077 
5078 		tmp &= ~grbm_soft_reset;
5079 		WREG32(GRBM_SOFT_RESET, tmp);
5080 		tmp = RREG32(GRBM_SOFT_RESET);
5081 	}
5082 
5083 	if (srbm_soft_reset) {
5084 		tmp = RREG32(SRBM_SOFT_RESET);
5085 		tmp |= srbm_soft_reset;
5086 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087 		WREG32(SRBM_SOFT_RESET, tmp);
5088 		tmp = RREG32(SRBM_SOFT_RESET);
5089 
5090 		udelay(50);
5091 
5092 		tmp &= ~srbm_soft_reset;
5093 		WREG32(SRBM_SOFT_RESET, tmp);
5094 		tmp = RREG32(SRBM_SOFT_RESET);
5095 	}
5096 
5097 	/* Wait a little for things to settle down */
5098 	udelay(50);
5099 
5100 	evergreen_mc_resume(rdev, &save);
5101 	udelay(50);
5102 
5103 	cik_print_gpu_status_regs(rdev);
5104 }
5105 
5106 struct kv_reset_save_regs {
5107 	u32 gmcon_reng_execute;
5108 	u32 gmcon_misc;
5109 	u32 gmcon_misc3;
5110 };
5111 
5112 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113 				   struct kv_reset_save_regs *save)
5114 {
5115 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116 	save->gmcon_misc = RREG32(GMCON_MISC);
5117 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118 
5119 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121 						STCTRL_STUTTER_EN));
5122 }
5123 
5124 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125 				      struct kv_reset_save_regs *save)
5126 {
5127 	int i;
5128 
5129 	WREG32(GMCON_PGFSM_WRITE, 0);
5130 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131 
5132 	for (i = 0; i < 5; i++)
5133 		WREG32(GMCON_PGFSM_WRITE, 0);
5134 
5135 	WREG32(GMCON_PGFSM_WRITE, 0);
5136 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137 
5138 	for (i = 0; i < 5; i++)
5139 		WREG32(GMCON_PGFSM_WRITE, 0);
5140 
5141 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143 
5144 	for (i = 0; i < 5; i++)
5145 		WREG32(GMCON_PGFSM_WRITE, 0);
5146 
5147 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149 
5150 	for (i = 0; i < 5; i++)
5151 		WREG32(GMCON_PGFSM_WRITE, 0);
5152 
5153 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155 
5156 	for (i = 0; i < 5; i++)
5157 		WREG32(GMCON_PGFSM_WRITE, 0);
5158 
5159 	WREG32(GMCON_PGFSM_WRITE, 0);
5160 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161 
5162 	for (i = 0; i < 5; i++)
5163 		WREG32(GMCON_PGFSM_WRITE, 0);
5164 
5165 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167 
5168 	for (i = 0; i < 5; i++)
5169 		WREG32(GMCON_PGFSM_WRITE, 0);
5170 
5171 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173 
5174 	for (i = 0; i < 5; i++)
5175 		WREG32(GMCON_PGFSM_WRITE, 0);
5176 
5177 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179 
5180 	for (i = 0; i < 5; i++)
5181 		WREG32(GMCON_PGFSM_WRITE, 0);
5182 
5183 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185 
5186 	for (i = 0; i < 5; i++)
5187 		WREG32(GMCON_PGFSM_WRITE, 0);
5188 
5189 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191 
5192 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5193 	WREG32(GMCON_MISC, save->gmcon_misc);
5194 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195 }
5196 
5197 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198 {
5199 	struct evergreen_mc_save save;
5200 	struct kv_reset_save_regs kv_save = { 0 };
5201 	u32 tmp, i;
5202 
5203 	dev_info(rdev->dev, "GPU pci config reset\n");
5204 
5205 	/* disable dpm? */
5206 
5207 	/* disable cg/pg */
5208 	cik_fini_pg(rdev);
5209 	cik_fini_cg(rdev);
5210 
5211 	/* Disable GFX parsing/prefetching */
5212 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213 
5214 	/* Disable MEC parsing/prefetching */
5215 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216 
5217 	/* sdma0 */
5218 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219 	tmp |= SDMA_HALT;
5220 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221 	/* sdma1 */
5222 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223 	tmp |= SDMA_HALT;
5224 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225 	/* XXX other engines? */
5226 
5227 	/* halt the rlc, disable cp internal ints */
5228 	cik_rlc_stop(rdev);
5229 
5230 	udelay(50);
5231 
5232 	/* disable mem access */
5233 	evergreen_mc_stop(rdev, &save);
5234 	if (evergreen_mc_wait_for_idle(rdev)) {
5235 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236 	}
5237 
5238 	if (rdev->flags & RADEON_IS_IGP)
5239 		kv_save_regs_for_reset(rdev, &kv_save);
5240 
5241 	/* disable BM */
5242 	pci_clear_master(rdev->pdev);
5243 	/* reset */
5244 	radeon_pci_config_reset(rdev);
5245 
5246 	udelay(100);
5247 
5248 	/* wait for asic to come out of reset */
5249 	for (i = 0; i < rdev->usec_timeout; i++) {
5250 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251 			break;
5252 		udelay(1);
5253 	}
5254 
5255 	/* does asic init need to be run first??? */
5256 	if (rdev->flags & RADEON_IS_IGP)
5257 		kv_restore_regs_for_reset(rdev, &kv_save);
5258 }
5259 
5260 /**
5261  * cik_asic_reset - soft reset GPU
5262  *
5263  * @rdev: radeon_device pointer
5264  *
5265  * Look up which blocks are hung and attempt
5266  * to reset them.
5267  * Returns 0 for success.
5268  */
5269 int cik_asic_reset(struct radeon_device *rdev)
5270 {
5271 	u32 reset_mask;
5272 
5273 	reset_mask = cik_gpu_check_soft_reset(rdev);
5274 
5275 	if (reset_mask)
5276 		r600_set_bios_scratch_engine_hung(rdev, true);
5277 
5278 	/* try soft reset */
5279 	cik_gpu_soft_reset(rdev, reset_mask);
5280 
5281 	reset_mask = cik_gpu_check_soft_reset(rdev);
5282 
5283 	/* try pci config reset */
5284 	if (reset_mask && radeon_hard_reset)
5285 		cik_gpu_pci_config_reset(rdev);
5286 
5287 	reset_mask = cik_gpu_check_soft_reset(rdev);
5288 
5289 	if (!reset_mask)
5290 		r600_set_bios_scratch_engine_hung(rdev, false);
5291 
5292 	return 0;
5293 }
5294 
5295 /**
5296  * cik_gfx_is_lockup - check if the 3D engine is locked up
5297  *
5298  * @rdev: radeon_device pointer
5299  * @ring: radeon_ring structure holding ring information
5300  *
5301  * Check if the 3D engine is locked up (CIK).
5302  * Returns true if the engine is locked, false if not.
5303  */
5304 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5305 {
5306 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5307 
5308 	if (!(reset_mask & (RADEON_RESET_GFX |
5309 			    RADEON_RESET_COMPUTE |
5310 			    RADEON_RESET_CP))) {
5311 		radeon_ring_lockup_update(rdev, ring);
5312 		return false;
5313 	}
5314 	return radeon_ring_test_lockup(rdev, ring);
5315 }
5316 
5317 /* MC */
5318 /**
5319  * cik_mc_program - program the GPU memory controller
5320  *
5321  * @rdev: radeon_device pointer
5322  *
5323  * Set the location of vram, gart, and AGP in the GPU's
5324  * physical address space (CIK).
5325  */
5326 static void cik_mc_program(struct radeon_device *rdev)
5327 {
5328 	struct evergreen_mc_save save;
5329 	u32 tmp;
5330 	int i, j;
5331 
5332 	/* Initialize HDP */
5333 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5334 		WREG32((0x2c14 + j), 0x00000000);
5335 		WREG32((0x2c18 + j), 0x00000000);
5336 		WREG32((0x2c1c + j), 0x00000000);
5337 		WREG32((0x2c20 + j), 0x00000000);
5338 		WREG32((0x2c24 + j), 0x00000000);
5339 	}
5340 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5341 
5342 	evergreen_mc_stop(rdev, &save);
5343 	if (radeon_mc_wait_for_idle(rdev)) {
5344 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5345 	}
5346 	/* Lockout access through VGA aperture*/
5347 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5348 	/* Update configuration */
5349 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5350 	       rdev->mc.vram_start >> 12);
5351 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5352 	       rdev->mc.vram_end >> 12);
5353 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5354 	       rdev->vram_scratch.gpu_addr >> 12);
5355 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5356 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5357 	WREG32(MC_VM_FB_LOCATION, tmp);
5358 	/* XXX double check these! */
5359 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5360 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5361 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5362 	WREG32(MC_VM_AGP_BASE, 0);
5363 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5364 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5365 	if (radeon_mc_wait_for_idle(rdev)) {
5366 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5367 	}
5368 	evergreen_mc_resume(rdev, &save);
5369 	/* we need to own VRAM, so turn off the VGA renderer here
5370 	 * to stop it overwriting our objects */
5371 	rv515_vga_render_disable(rdev);
5372 }
5373 
5374 /**
5375  * cik_mc_init - initialize the memory controller driver params
5376  *
5377  * @rdev: radeon_device pointer
5378  *
5379  * Look up the amount of vram, vram width, and decide how to place
5380  * vram and gart within the GPU's physical address space (CIK).
5381  * Returns 0 for success.
5382  */
5383 static int cik_mc_init(struct radeon_device *rdev)
5384 {
5385 	u32 tmp;
5386 	int chansize, numchan;
5387 
5388 	/* Get VRAM informations */
5389 	rdev->mc.vram_is_ddr = true;
5390 	tmp = RREG32(MC_ARB_RAMCFG);
5391 	if (tmp & CHANSIZE_MASK) {
5392 		chansize = 64;
5393 	} else {
5394 		chansize = 32;
5395 	}
5396 	tmp = RREG32(MC_SHARED_CHMAP);
5397 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5398 	case 0:
5399 	default:
5400 		numchan = 1;
5401 		break;
5402 	case 1:
5403 		numchan = 2;
5404 		break;
5405 	case 2:
5406 		numchan = 4;
5407 		break;
5408 	case 3:
5409 		numchan = 8;
5410 		break;
5411 	case 4:
5412 		numchan = 3;
5413 		break;
5414 	case 5:
5415 		numchan = 6;
5416 		break;
5417 	case 6:
5418 		numchan = 10;
5419 		break;
5420 	case 7:
5421 		numchan = 12;
5422 		break;
5423 	case 8:
5424 		numchan = 16;
5425 		break;
5426 	}
5427 	rdev->mc.vram_width = numchan * chansize;
5428 	/* Could aper size report 0 ? */
5429 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5430 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5431 	/* size in MB on si */
5432 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5433 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5434 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5435 	si_vram_gtt_location(rdev, &rdev->mc);
5436 	radeon_update_bandwidth_info(rdev);
5437 
5438 	return 0;
5439 }
5440 
5441 /*
5442  * GART
5443  * VMID 0 is the physical GPU addresses as used by the kernel.
5444  * VMIDs 1-15 are used for userspace clients and are handled
5445  * by the radeon vm/hsa code.
5446  */
5447 /**
5448  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5449  *
5450  * @rdev: radeon_device pointer
5451  *
5452  * Flush the TLB for the VMID 0 page table (CIK).
5453  */
5454 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5455 {
5456 	/* flush hdp cache */
5457 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5458 
5459 	/* bits 0-15 are the VM contexts0-15 */
5460 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5461 }
5462 
5463 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5464 {
5465 	int i;
5466 	uint32_t sh_mem_bases, sh_mem_config;
5467 
5468 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5469 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5470 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5471 
5472 	mutex_lock(&rdev->srbm_mutex);
5473 	for (i = 8; i < 16; i++) {
5474 		cik_srbm_select(rdev, 0, 0, 0, i);
5475 		/* CP and shaders */
5476 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5477 		WREG32(SH_MEM_APE1_BASE, 1);
5478 		WREG32(SH_MEM_APE1_LIMIT, 0);
5479 		WREG32(SH_MEM_BASES, sh_mem_bases);
5480 	}
5481 	cik_srbm_select(rdev, 0, 0, 0, 0);
5482 	mutex_unlock(&rdev->srbm_mutex);
5483 }
5484 
5485 /**
5486  * cik_pcie_gart_enable - gart enable
5487  *
5488  * @rdev: radeon_device pointer
5489  *
5490  * This sets up the TLBs, programs the page tables for VMID0,
5491  * sets up the hw for VMIDs 1-15 which are allocated on
5492  * demand, and sets up the global locations for the LDS, GDS,
5493  * and GPUVM for FSA64 clients (CIK).
5494  * Returns 0 for success, errors for failure.
5495  */
5496 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5497 {
5498 	int r, i;
5499 
5500 	if (rdev->gart.robj == NULL) {
5501 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5502 		return -EINVAL;
5503 	}
5504 	r = radeon_gart_table_vram_pin(rdev);
5505 	if (r)
5506 		return r;
5507 	/* Setup TLB control */
5508 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5509 	       (0xA << 7) |
5510 	       ENABLE_L1_TLB |
5511 	       ENABLE_L1_FRAGMENT_PROCESSING |
5512 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5513 	       ENABLE_ADVANCED_DRIVER_MODEL |
5514 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5515 	/* Setup L2 cache */
5516 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5517 	       ENABLE_L2_FRAGMENT_PROCESSING |
5518 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5519 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5520 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5521 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5522 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5523 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5524 	       BANK_SELECT(4) |
5525 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5526 	/* setup context0 */
5527 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5528 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5529 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5530 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5531 			(u32)(rdev->dummy_page.addr >> 12));
5532 	WREG32(VM_CONTEXT0_CNTL2, 0);
5533 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5534 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5535 
5536 	WREG32(0x15D4, 0);
5537 	WREG32(0x15D8, 0);
5538 	WREG32(0x15DC, 0);
5539 
5540 	/* restore context1-15 */
5541 	/* set vm size, must be a multiple of 4 */
5542 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5543 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5544 	for (i = 1; i < 16; i++) {
5545 		if (i < 8)
5546 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5547 			       rdev->vm_manager.saved_table_addr[i]);
5548 		else
5549 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5550 			       rdev->vm_manager.saved_table_addr[i]);
5551 	}
5552 
5553 	/* enable context1-15 */
5554 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5555 	       (u32)(rdev->dummy_page.addr >> 12));
5556 	WREG32(VM_CONTEXT1_CNTL2, 4);
5557 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5558 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5559 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5560 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5561 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5562 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5563 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5564 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5565 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5567 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5569 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5571 
5572 	if (rdev->family == CHIP_KAVERI) {
5573 		u32 tmp = RREG32(CHUB_CONTROL);
5574 		tmp &= ~BYPASS_VM;
5575 		WREG32(CHUB_CONTROL, tmp);
5576 	}
5577 
5578 	/* XXX SH_MEM regs */
5579 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5580 	mutex_lock(&rdev->srbm_mutex);
5581 	for (i = 0; i < 16; i++) {
5582 		cik_srbm_select(rdev, 0, 0, 0, i);
5583 		/* CP and shaders */
5584 		WREG32(SH_MEM_CONFIG, 0);
5585 		WREG32(SH_MEM_APE1_BASE, 1);
5586 		WREG32(SH_MEM_APE1_LIMIT, 0);
5587 		WREG32(SH_MEM_BASES, 0);
5588 		/* SDMA GFX */
5589 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5590 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5591 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5592 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5593 		/* XXX SDMA RLC - todo */
5594 	}
5595 	cik_srbm_select(rdev, 0, 0, 0, 0);
5596 	mutex_unlock(&rdev->srbm_mutex);
5597 
5598 	cik_pcie_init_compute_vmid(rdev);
5599 
5600 	cik_pcie_gart_tlb_flush(rdev);
5601 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5602 		 (unsigned)(rdev->mc.gtt_size >> 20),
5603 		 (unsigned long long)rdev->gart.table_addr);
5604 	rdev->gart.ready = true;
5605 	return 0;
5606 }
5607 
5608 /**
5609  * cik_pcie_gart_disable - gart disable
5610  *
5611  * @rdev: radeon_device pointer
5612  *
5613  * This disables all VM page table (CIK).
5614  */
5615 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5616 {
5617 	unsigned i;
5618 
5619 	for (i = 1; i < 16; ++i) {
5620 		uint32_t reg;
5621 		if (i < 8)
5622 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5623 		else
5624 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5625 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5626 	}
5627 
5628 	/* Disable all tables */
5629 	WREG32(VM_CONTEXT0_CNTL, 0);
5630 	WREG32(VM_CONTEXT1_CNTL, 0);
5631 	/* Setup TLB control */
5632 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5633 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5634 	/* Setup L2 cache */
5635 	WREG32(VM_L2_CNTL,
5636 	       ENABLE_L2_FRAGMENT_PROCESSING |
5637 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5638 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5639 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5640 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5641 	WREG32(VM_L2_CNTL2, 0);
5642 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5643 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5644 	radeon_gart_table_vram_unpin(rdev);
5645 }
5646 
5647 /**
5648  * cik_pcie_gart_fini - vm fini callback
5649  *
5650  * @rdev: radeon_device pointer
5651  *
5652  * Tears down the driver GART/VM setup (CIK).
5653  */
5654 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5655 {
5656 	cik_pcie_gart_disable(rdev);
5657 	radeon_gart_table_vram_free(rdev);
5658 	radeon_gart_fini(rdev);
5659 }
5660 
5661 /* vm parser */
5662 /**
5663  * cik_ib_parse - vm ib_parse callback
5664  *
5665  * @rdev: radeon_device pointer
5666  * @ib: indirect buffer pointer
5667  *
5668  * CIK uses hw IB checking so this is a nop (CIK).
5669  */
5670 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5671 {
5672 	return 0;
5673 }
5674 
5675 /*
5676  * vm
5677  * VMID 0 is the physical GPU addresses as used by the kernel.
5678  * VMIDs 1-15 are used for userspace clients and are handled
5679  * by the radeon vm/hsa code.
5680  */
5681 /**
5682  * cik_vm_init - cik vm init callback
5683  *
5684  * @rdev: radeon_device pointer
5685  *
5686  * Inits cik specific vm parameters (number of VMs, base of vram for
5687  * VMIDs 1-15) (CIK).
5688  * Returns 0 for success.
5689  */
5690 int cik_vm_init(struct radeon_device *rdev)
5691 {
5692 	/*
5693 	 * number of VMs
5694 	 * VMID 0 is reserved for System
5695 	 * radeon graphics/compute will use VMIDs 1-7
5696 	 * amdkfd will use VMIDs 8-15
5697 	 */
5698 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5699 	/* base offset of vram pages */
5700 	if (rdev->flags & RADEON_IS_IGP) {
5701 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5702 		tmp <<= 22;
5703 		rdev->vm_manager.vram_base_offset = tmp;
5704 	} else
5705 		rdev->vm_manager.vram_base_offset = 0;
5706 
5707 	return 0;
5708 }
5709 
5710 /**
5711  * cik_vm_fini - cik vm fini callback
5712  *
5713  * @rdev: radeon_device pointer
5714  *
5715  * Tear down any asic specific VM setup (CIK).
5716  */
5717 void cik_vm_fini(struct radeon_device *rdev)
5718 {
5719 }
5720 
5721 /**
5722  * cik_vm_decode_fault - print human readable fault info
5723  *
5724  * @rdev: radeon_device pointer
5725  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5726  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5727  *
5728  * Print human readable fault information (CIK).
5729  */
5730 static void cik_vm_decode_fault(struct radeon_device *rdev,
5731 				u32 status, u32 addr, u32 mc_client)
5732 {
5733 	u32 mc_id;
5734 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5735 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5736 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5737 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5738 
5739 	if (rdev->family == CHIP_HAWAII)
5740 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5741 	else
5742 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5743 
5744 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5745 	       protections, vmid, addr,
5746 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5747 	       block, mc_client, mc_id);
5748 }
5749 
5750 /**
5751  * cik_vm_flush - cik vm flush using the CP
5752  *
5753  * @rdev: radeon_device pointer
5754  *
5755  * Update the page table base and flush the VM TLB
5756  * using the CP (CIK).
5757  */
5758 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5759 		  unsigned vm_id, uint64_t pd_addr)
5760 {
5761 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5762 
5763 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5764 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5765 				 WRITE_DATA_DST_SEL(0)));
5766 	if (vm_id < 8) {
5767 		radeon_ring_write(ring,
5768 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5769 	} else {
5770 		radeon_ring_write(ring,
5771 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5772 	}
5773 	radeon_ring_write(ring, 0);
5774 	radeon_ring_write(ring, pd_addr >> 12);
5775 
5776 	/* update SH_MEM_* regs */
5777 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5778 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5779 				 WRITE_DATA_DST_SEL(0)));
5780 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5781 	radeon_ring_write(ring, 0);
5782 	radeon_ring_write(ring, VMID(vm_id));
5783 
5784 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5785 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5786 				 WRITE_DATA_DST_SEL(0)));
5787 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5788 	radeon_ring_write(ring, 0);
5789 
5790 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5791 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5792 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5793 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5794 
5795 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797 				 WRITE_DATA_DST_SEL(0)));
5798 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5799 	radeon_ring_write(ring, 0);
5800 	radeon_ring_write(ring, VMID(0));
5801 
5802 	/* HDP flush */
5803 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5804 
5805 	/* bits 0-15 are the VM contexts0-15 */
5806 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808 				 WRITE_DATA_DST_SEL(0)));
5809 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5810 	radeon_ring_write(ring, 0);
5811 	radeon_ring_write(ring, 1 << vm_id);
5812 
5813 	/* wait for the invalidate to complete */
5814 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5815 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5816 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5817 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5818 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5819 	radeon_ring_write(ring, 0);
5820 	radeon_ring_write(ring, 0); /* ref */
5821 	radeon_ring_write(ring, 0); /* mask */
5822 	radeon_ring_write(ring, 0x20); /* poll interval */
5823 
5824 	/* compute doesn't have PFP */
5825 	if (usepfp) {
5826 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5827 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5828 		radeon_ring_write(ring, 0x0);
5829 	}
5830 }
5831 
5832 /*
5833  * RLC
5834  * The RLC is a multi-purpose microengine that handles a
5835  * variety of functions, the most important of which is
5836  * the interrupt controller.
5837  */
5838 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5839 					  bool enable)
5840 {
5841 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5842 
5843 	if (enable)
5844 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5845 	else
5846 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5847 	WREG32(CP_INT_CNTL_RING0, tmp);
5848 }
5849 
5850 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5851 {
5852 	u32 tmp;
5853 
5854 	tmp = RREG32(RLC_LB_CNTL);
5855 	if (enable)
5856 		tmp |= LOAD_BALANCE_ENABLE;
5857 	else
5858 		tmp &= ~LOAD_BALANCE_ENABLE;
5859 	WREG32(RLC_LB_CNTL, tmp);
5860 }
5861 
5862 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5863 {
5864 	u32 i, j, k;
5865 	u32 mask;
5866 
5867 	mutex_lock(&rdev->grbm_idx_mutex);
5868 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5869 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5870 			cik_select_se_sh(rdev, i, j);
5871 			for (k = 0; k < rdev->usec_timeout; k++) {
5872 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5873 					break;
5874 				udelay(1);
5875 			}
5876 		}
5877 	}
5878 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5879 	mutex_unlock(&rdev->grbm_idx_mutex);
5880 
5881 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5882 	for (k = 0; k < rdev->usec_timeout; k++) {
5883 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5884 			break;
5885 		udelay(1);
5886 	}
5887 }
5888 
5889 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5890 {
5891 	u32 tmp;
5892 
5893 	tmp = RREG32(RLC_CNTL);
5894 	if (tmp != rlc)
5895 		WREG32(RLC_CNTL, rlc);
5896 }
5897 
5898 static u32 cik_halt_rlc(struct radeon_device *rdev)
5899 {
5900 	u32 data, orig;
5901 
5902 	orig = data = RREG32(RLC_CNTL);
5903 
5904 	if (data & RLC_ENABLE) {
5905 		u32 i;
5906 
5907 		data &= ~RLC_ENABLE;
5908 		WREG32(RLC_CNTL, data);
5909 
5910 		for (i = 0; i < rdev->usec_timeout; i++) {
5911 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5912 				break;
5913 			udelay(1);
5914 		}
5915 
5916 		cik_wait_for_rlc_serdes(rdev);
5917 	}
5918 
5919 	return orig;
5920 }
5921 
5922 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5923 {
5924 	u32 tmp, i, mask;
5925 
5926 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5927 	WREG32(RLC_GPR_REG2, tmp);
5928 
5929 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5930 	for (i = 0; i < rdev->usec_timeout; i++) {
5931 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5932 			break;
5933 		udelay(1);
5934 	}
5935 
5936 	for (i = 0; i < rdev->usec_timeout; i++) {
5937 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5938 			break;
5939 		udelay(1);
5940 	}
5941 }
5942 
5943 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5944 {
5945 	u32 tmp;
5946 
5947 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5948 	WREG32(RLC_GPR_REG2, tmp);
5949 }
5950 
5951 /**
5952  * cik_rlc_stop - stop the RLC ME
5953  *
5954  * @rdev: radeon_device pointer
5955  *
5956  * Halt the RLC ME (MicroEngine) (CIK).
5957  */
5958 static void cik_rlc_stop(struct radeon_device *rdev)
5959 {
5960 	WREG32(RLC_CNTL, 0);
5961 
5962 	cik_enable_gui_idle_interrupt(rdev, false);
5963 
5964 	cik_wait_for_rlc_serdes(rdev);
5965 }
5966 
5967 /**
5968  * cik_rlc_start - start the RLC ME
5969  *
5970  * @rdev: radeon_device pointer
5971  *
5972  * Unhalt the RLC ME (MicroEngine) (CIK).
5973  */
5974 static void cik_rlc_start(struct radeon_device *rdev)
5975 {
5976 	WREG32(RLC_CNTL, RLC_ENABLE);
5977 
5978 	cik_enable_gui_idle_interrupt(rdev, true);
5979 
5980 	udelay(50);
5981 }
5982 
5983 /**
5984  * cik_rlc_resume - setup the RLC hw
5985  *
5986  * @rdev: radeon_device pointer
5987  *
5988  * Initialize the RLC registers, load the ucode,
5989  * and start the RLC (CIK).
5990  * Returns 0 for success, -EINVAL if the ucode is not available.
5991  */
5992 static int cik_rlc_resume(struct radeon_device *rdev)
5993 {
5994 	u32 i, size, tmp;
5995 
5996 	if (!rdev->rlc_fw)
5997 		return -EINVAL;
5998 
5999 	cik_rlc_stop(rdev);
6000 
6001 	/* disable CG */
6002 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6003 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6004 
6005 	si_rlc_reset(rdev);
6006 
6007 	cik_init_pg(rdev);
6008 
6009 	cik_init_cg(rdev);
6010 
6011 	WREG32(RLC_LB_CNTR_INIT, 0);
6012 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6013 
6014 	mutex_lock(&rdev->grbm_idx_mutex);
6015 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6016 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6017 	WREG32(RLC_LB_PARAMS, 0x00600408);
6018 	WREG32(RLC_LB_CNTL, 0x80000004);
6019 	mutex_unlock(&rdev->grbm_idx_mutex);
6020 
6021 	WREG32(RLC_MC_CNTL, 0);
6022 	WREG32(RLC_UCODE_CNTL, 0);
6023 
6024 	if (rdev->new_fw) {
6025 		const struct rlc_firmware_header_v1_0 *hdr =
6026 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6027 		const __le32 *fw_data = (const __le32 *)
6028 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6029 
6030 		radeon_ucode_print_rlc_hdr(&hdr->header);
6031 
6032 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6033 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6034 		for (i = 0; i < size; i++)
6035 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6036 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6037 	} else {
6038 		const __be32 *fw_data;
6039 
6040 		switch (rdev->family) {
6041 		case CHIP_BONAIRE:
6042 		case CHIP_HAWAII:
6043 		default:
6044 			size = BONAIRE_RLC_UCODE_SIZE;
6045 			break;
6046 		case CHIP_KAVERI:
6047 			size = KV_RLC_UCODE_SIZE;
6048 			break;
6049 		case CHIP_KABINI:
6050 			size = KB_RLC_UCODE_SIZE;
6051 			break;
6052 		case CHIP_MULLINS:
6053 			size = ML_RLC_UCODE_SIZE;
6054 			break;
6055 		}
6056 
6057 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6058 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6059 		for (i = 0; i < size; i++)
6060 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6061 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6062 	}
6063 
6064 	/* XXX - find out what chips support lbpw */
6065 	cik_enable_lbpw(rdev, false);
6066 
6067 	if (rdev->family == CHIP_BONAIRE)
6068 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6069 
6070 	cik_rlc_start(rdev);
6071 
6072 	return 0;
6073 }
6074 
6075 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6076 {
6077 	u32 data, orig, tmp, tmp2;
6078 
6079 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6080 
6081 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6082 		cik_enable_gui_idle_interrupt(rdev, true);
6083 
6084 		tmp = cik_halt_rlc(rdev);
6085 
6086 		mutex_lock(&rdev->grbm_idx_mutex);
6087 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6088 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6089 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6090 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6091 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6092 		mutex_unlock(&rdev->grbm_idx_mutex);
6093 
6094 		cik_update_rlc(rdev, tmp);
6095 
6096 		data |= CGCG_EN | CGLS_EN;
6097 	} else {
6098 		cik_enable_gui_idle_interrupt(rdev, false);
6099 
6100 		RREG32(CB_CGTT_SCLK_CTRL);
6101 		RREG32(CB_CGTT_SCLK_CTRL);
6102 		RREG32(CB_CGTT_SCLK_CTRL);
6103 		RREG32(CB_CGTT_SCLK_CTRL);
6104 
6105 		data &= ~(CGCG_EN | CGLS_EN);
6106 	}
6107 
6108 	if (orig != data)
6109 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6110 
6111 }
6112 
6113 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6114 {
6115 	u32 data, orig, tmp = 0;
6116 
6117 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6118 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6119 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6120 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6121 				data |= CP_MEM_LS_EN;
6122 				if (orig != data)
6123 					WREG32(CP_MEM_SLP_CNTL, data);
6124 			}
6125 		}
6126 
6127 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6128 		data |= 0x00000001;
6129 		data &= 0xfffffffd;
6130 		if (orig != data)
6131 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6132 
6133 		tmp = cik_halt_rlc(rdev);
6134 
6135 		mutex_lock(&rdev->grbm_idx_mutex);
6136 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6137 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6138 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6139 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6140 		WREG32(RLC_SERDES_WR_CTRL, data);
6141 		mutex_unlock(&rdev->grbm_idx_mutex);
6142 
6143 		cik_update_rlc(rdev, tmp);
6144 
6145 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6146 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6147 			data &= ~SM_MODE_MASK;
6148 			data |= SM_MODE(0x2);
6149 			data |= SM_MODE_ENABLE;
6150 			data &= ~CGTS_OVERRIDE;
6151 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6152 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6153 				data &= ~CGTS_LS_OVERRIDE;
6154 			data &= ~ON_MONITOR_ADD_MASK;
6155 			data |= ON_MONITOR_ADD_EN;
6156 			data |= ON_MONITOR_ADD(0x96);
6157 			if (orig != data)
6158 				WREG32(CGTS_SM_CTRL_REG, data);
6159 		}
6160 	} else {
6161 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6162 		data |= 0x00000003;
6163 		if (orig != data)
6164 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6165 
6166 		data = RREG32(RLC_MEM_SLP_CNTL);
6167 		if (data & RLC_MEM_LS_EN) {
6168 			data &= ~RLC_MEM_LS_EN;
6169 			WREG32(RLC_MEM_SLP_CNTL, data);
6170 		}
6171 
6172 		data = RREG32(CP_MEM_SLP_CNTL);
6173 		if (data & CP_MEM_LS_EN) {
6174 			data &= ~CP_MEM_LS_EN;
6175 			WREG32(CP_MEM_SLP_CNTL, data);
6176 		}
6177 
6178 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6179 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6180 		if (orig != data)
6181 			WREG32(CGTS_SM_CTRL_REG, data);
6182 
6183 		tmp = cik_halt_rlc(rdev);
6184 
6185 		mutex_lock(&rdev->grbm_idx_mutex);
6186 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6187 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6188 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6189 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6190 		WREG32(RLC_SERDES_WR_CTRL, data);
6191 		mutex_unlock(&rdev->grbm_idx_mutex);
6192 
6193 		cik_update_rlc(rdev, tmp);
6194 	}
6195 }
6196 
6197 static const u32 mc_cg_registers[] =
6198 {
6199 	MC_HUB_MISC_HUB_CG,
6200 	MC_HUB_MISC_SIP_CG,
6201 	MC_HUB_MISC_VM_CG,
6202 	MC_XPB_CLK_GAT,
6203 	ATC_MISC_CG,
6204 	MC_CITF_MISC_WR_CG,
6205 	MC_CITF_MISC_RD_CG,
6206 	MC_CITF_MISC_VM_CG,
6207 	VM_L2_CG,
6208 };
6209 
6210 static void cik_enable_mc_ls(struct radeon_device *rdev,
6211 			     bool enable)
6212 {
6213 	int i;
6214 	u32 orig, data;
6215 
6216 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6217 		orig = data = RREG32(mc_cg_registers[i]);
6218 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6219 			data |= MC_LS_ENABLE;
6220 		else
6221 			data &= ~MC_LS_ENABLE;
6222 		if (data != orig)
6223 			WREG32(mc_cg_registers[i], data);
6224 	}
6225 }
6226 
6227 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6228 			       bool enable)
6229 {
6230 	int i;
6231 	u32 orig, data;
6232 
6233 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6234 		orig = data = RREG32(mc_cg_registers[i]);
6235 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6236 			data |= MC_CG_ENABLE;
6237 		else
6238 			data &= ~MC_CG_ENABLE;
6239 		if (data != orig)
6240 			WREG32(mc_cg_registers[i], data);
6241 	}
6242 }
6243 
6244 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6245 				 bool enable)
6246 {
6247 	u32 orig, data;
6248 
6249 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6250 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6251 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6252 	} else {
6253 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6254 		data |= 0xff000000;
6255 		if (data != orig)
6256 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6257 
6258 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6259 		data |= 0xff000000;
6260 		if (data != orig)
6261 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6262 	}
6263 }
6264 
6265 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6266 				 bool enable)
6267 {
6268 	u32 orig, data;
6269 
6270 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6271 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6272 		data |= 0x100;
6273 		if (orig != data)
6274 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6275 
6276 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6277 		data |= 0x100;
6278 		if (orig != data)
6279 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6280 	} else {
6281 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6282 		data &= ~0x100;
6283 		if (orig != data)
6284 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6285 
6286 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6287 		data &= ~0x100;
6288 		if (orig != data)
6289 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6290 	}
6291 }
6292 
6293 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6294 				bool enable)
6295 {
6296 	u32 orig, data;
6297 
6298 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6299 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6300 		data = 0xfff;
6301 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6302 
6303 		orig = data = RREG32(UVD_CGC_CTRL);
6304 		data |= DCM;
6305 		if (orig != data)
6306 			WREG32(UVD_CGC_CTRL, data);
6307 	} else {
6308 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6309 		data &= ~0xfff;
6310 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6311 
6312 		orig = data = RREG32(UVD_CGC_CTRL);
6313 		data &= ~DCM;
6314 		if (orig != data)
6315 			WREG32(UVD_CGC_CTRL, data);
6316 	}
6317 }
6318 
6319 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6320 			       bool enable)
6321 {
6322 	u32 orig, data;
6323 
6324 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6325 
6326 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6327 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6328 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6329 	else
6330 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6331 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6332 
6333 	if (orig != data)
6334 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6335 }
6336 
6337 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6338 				bool enable)
6339 {
6340 	u32 orig, data;
6341 
6342 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6343 
6344 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6345 		data &= ~CLOCK_GATING_DIS;
6346 	else
6347 		data |= CLOCK_GATING_DIS;
6348 
6349 	if (orig != data)
6350 		WREG32(HDP_HOST_PATH_CNTL, data);
6351 }
6352 
6353 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6354 			      bool enable)
6355 {
6356 	u32 orig, data;
6357 
6358 	orig = data = RREG32(HDP_MEM_POWER_LS);
6359 
6360 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6361 		data |= HDP_LS_ENABLE;
6362 	else
6363 		data &= ~HDP_LS_ENABLE;
6364 
6365 	if (orig != data)
6366 		WREG32(HDP_MEM_POWER_LS, data);
6367 }
6368 
6369 void cik_update_cg(struct radeon_device *rdev,
6370 		   u32 block, bool enable)
6371 {
6372 
6373 	if (block & RADEON_CG_BLOCK_GFX) {
6374 		cik_enable_gui_idle_interrupt(rdev, false);
6375 		/* order matters! */
6376 		if (enable) {
6377 			cik_enable_mgcg(rdev, true);
6378 			cik_enable_cgcg(rdev, true);
6379 		} else {
6380 			cik_enable_cgcg(rdev, false);
6381 			cik_enable_mgcg(rdev, false);
6382 		}
6383 		cik_enable_gui_idle_interrupt(rdev, true);
6384 	}
6385 
6386 	if (block & RADEON_CG_BLOCK_MC) {
6387 		if (!(rdev->flags & RADEON_IS_IGP)) {
6388 			cik_enable_mc_mgcg(rdev, enable);
6389 			cik_enable_mc_ls(rdev, enable);
6390 		}
6391 	}
6392 
6393 	if (block & RADEON_CG_BLOCK_SDMA) {
6394 		cik_enable_sdma_mgcg(rdev, enable);
6395 		cik_enable_sdma_mgls(rdev, enable);
6396 	}
6397 
6398 	if (block & RADEON_CG_BLOCK_BIF) {
6399 		cik_enable_bif_mgls(rdev, enable);
6400 	}
6401 
6402 	if (block & RADEON_CG_BLOCK_UVD) {
6403 		if (rdev->has_uvd)
6404 			cik_enable_uvd_mgcg(rdev, enable);
6405 	}
6406 
6407 	if (block & RADEON_CG_BLOCK_HDP) {
6408 		cik_enable_hdp_mgcg(rdev, enable);
6409 		cik_enable_hdp_ls(rdev, enable);
6410 	}
6411 
6412 	if (block & RADEON_CG_BLOCK_VCE) {
6413 		vce_v2_0_enable_mgcg(rdev, enable);
6414 	}
6415 }
6416 
6417 static void cik_init_cg(struct radeon_device *rdev)
6418 {
6419 
6420 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6421 
6422 	if (rdev->has_uvd)
6423 		si_init_uvd_internal_cg(rdev);
6424 
6425 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6426 			     RADEON_CG_BLOCK_SDMA |
6427 			     RADEON_CG_BLOCK_BIF |
6428 			     RADEON_CG_BLOCK_UVD |
6429 			     RADEON_CG_BLOCK_HDP), true);
6430 }
6431 
6432 static void cik_fini_cg(struct radeon_device *rdev)
6433 {
6434 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6435 			     RADEON_CG_BLOCK_SDMA |
6436 			     RADEON_CG_BLOCK_BIF |
6437 			     RADEON_CG_BLOCK_UVD |
6438 			     RADEON_CG_BLOCK_HDP), false);
6439 
6440 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6441 }
6442 
6443 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6444 					  bool enable)
6445 {
6446 	u32 data, orig;
6447 
6448 	orig = data = RREG32(RLC_PG_CNTL);
6449 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6450 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6451 	else
6452 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6453 	if (orig != data)
6454 		WREG32(RLC_PG_CNTL, data);
6455 }
6456 
6457 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6458 					  bool enable)
6459 {
6460 	u32 data, orig;
6461 
6462 	orig = data = RREG32(RLC_PG_CNTL);
6463 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6464 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6465 	else
6466 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6467 	if (orig != data)
6468 		WREG32(RLC_PG_CNTL, data);
6469 }
6470 
6471 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6472 {
6473 	u32 data, orig;
6474 
6475 	orig = data = RREG32(RLC_PG_CNTL);
6476 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6477 		data &= ~DISABLE_CP_PG;
6478 	else
6479 		data |= DISABLE_CP_PG;
6480 	if (orig != data)
6481 		WREG32(RLC_PG_CNTL, data);
6482 }
6483 
6484 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6485 {
6486 	u32 data, orig;
6487 
6488 	orig = data = RREG32(RLC_PG_CNTL);
6489 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6490 		data &= ~DISABLE_GDS_PG;
6491 	else
6492 		data |= DISABLE_GDS_PG;
6493 	if (orig != data)
6494 		WREG32(RLC_PG_CNTL, data);
6495 }
6496 
6497 #define CP_ME_TABLE_SIZE    96
6498 #define CP_ME_TABLE_OFFSET  2048
6499 #define CP_MEC_TABLE_OFFSET 4096
6500 
6501 void cik_init_cp_pg_table(struct radeon_device *rdev)
6502 {
6503 	volatile u32 *dst_ptr;
6504 	int me, i, max_me = 4;
6505 	u32 bo_offset = 0;
6506 	u32 table_offset, table_size;
6507 
6508 	if (rdev->family == CHIP_KAVERI)
6509 		max_me = 5;
6510 
6511 	if (rdev->rlc.cp_table_ptr == NULL)
6512 		return;
6513 
6514 	/* write the cp table buffer */
6515 	dst_ptr = rdev->rlc.cp_table_ptr;
6516 	for (me = 0; me < max_me; me++) {
6517 		if (rdev->new_fw) {
6518 			const __le32 *fw_data;
6519 			const struct gfx_firmware_header_v1_0 *hdr;
6520 
6521 			if (me == 0) {
6522 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6523 				fw_data = (const __le32 *)
6524 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6525 				table_offset = le32_to_cpu(hdr->jt_offset);
6526 				table_size = le32_to_cpu(hdr->jt_size);
6527 			} else if (me == 1) {
6528 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6529 				fw_data = (const __le32 *)
6530 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531 				table_offset = le32_to_cpu(hdr->jt_offset);
6532 				table_size = le32_to_cpu(hdr->jt_size);
6533 			} else if (me == 2) {
6534 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6535 				fw_data = (const __le32 *)
6536 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537 				table_offset = le32_to_cpu(hdr->jt_offset);
6538 				table_size = le32_to_cpu(hdr->jt_size);
6539 			} else if (me == 3) {
6540 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6541 				fw_data = (const __le32 *)
6542 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543 				table_offset = le32_to_cpu(hdr->jt_offset);
6544 				table_size = le32_to_cpu(hdr->jt_size);
6545 			} else {
6546 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6547 				fw_data = (const __le32 *)
6548 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549 				table_offset = le32_to_cpu(hdr->jt_offset);
6550 				table_size = le32_to_cpu(hdr->jt_size);
6551 			}
6552 
6553 			for (i = 0; i < table_size; i ++) {
6554 				dst_ptr[bo_offset + i] =
6555 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6556 			}
6557 			bo_offset += table_size;
6558 		} else {
6559 			const __be32 *fw_data;
6560 			table_size = CP_ME_TABLE_SIZE;
6561 
6562 			if (me == 0) {
6563 				fw_data = (const __be32 *)rdev->ce_fw->data;
6564 				table_offset = CP_ME_TABLE_OFFSET;
6565 			} else if (me == 1) {
6566 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6567 				table_offset = CP_ME_TABLE_OFFSET;
6568 			} else if (me == 2) {
6569 				fw_data = (const __be32 *)rdev->me_fw->data;
6570 				table_offset = CP_ME_TABLE_OFFSET;
6571 			} else {
6572 				fw_data = (const __be32 *)rdev->mec_fw->data;
6573 				table_offset = CP_MEC_TABLE_OFFSET;
6574 			}
6575 
6576 			for (i = 0; i < table_size; i ++) {
6577 				dst_ptr[bo_offset + i] =
6578 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6579 			}
6580 			bo_offset += table_size;
6581 		}
6582 	}
6583 }
6584 
6585 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6586 				bool enable)
6587 {
6588 	u32 data, orig;
6589 
6590 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6591 		orig = data = RREG32(RLC_PG_CNTL);
6592 		data |= GFX_PG_ENABLE;
6593 		if (orig != data)
6594 			WREG32(RLC_PG_CNTL, data);
6595 
6596 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6597 		data |= AUTO_PG_EN;
6598 		if (orig != data)
6599 			WREG32(RLC_AUTO_PG_CTRL, data);
6600 	} else {
6601 		orig = data = RREG32(RLC_PG_CNTL);
6602 		data &= ~GFX_PG_ENABLE;
6603 		if (orig != data)
6604 			WREG32(RLC_PG_CNTL, data);
6605 
6606 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6607 		data &= ~AUTO_PG_EN;
6608 		if (orig != data)
6609 			WREG32(RLC_AUTO_PG_CTRL, data);
6610 
6611 		data = RREG32(DB_RENDER_CONTROL);
6612 	}
6613 }
6614 
6615 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6616 {
6617 	u32 mask = 0, tmp, tmp1;
6618 	int i;
6619 
6620 	mutex_lock(&rdev->grbm_idx_mutex);
6621 	cik_select_se_sh(rdev, se, sh);
6622 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6623 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6624 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6625 	mutex_unlock(&rdev->grbm_idx_mutex);
6626 
6627 	tmp &= 0xffff0000;
6628 
6629 	tmp |= tmp1;
6630 	tmp >>= 16;
6631 
6632 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6633 		mask <<= 1;
6634 		mask |= 1;
6635 	}
6636 
6637 	return (~tmp) & mask;
6638 }
6639 
6640 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6641 {
6642 	u32 i, j, k, active_cu_number = 0;
6643 	u32 mask, counter, cu_bitmap;
6644 	u32 tmp = 0;
6645 
6646 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6647 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6648 			mask = 1;
6649 			cu_bitmap = 0;
6650 			counter = 0;
6651 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6652 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6653 					if (counter < 2)
6654 						cu_bitmap |= mask;
6655 					counter ++;
6656 				}
6657 				mask <<= 1;
6658 			}
6659 
6660 			active_cu_number += counter;
6661 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6662 		}
6663 	}
6664 
6665 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6666 
6667 	tmp = RREG32(RLC_MAX_PG_CU);
6668 	tmp &= ~MAX_PU_CU_MASK;
6669 	tmp |= MAX_PU_CU(active_cu_number);
6670 	WREG32(RLC_MAX_PG_CU, tmp);
6671 }
6672 
6673 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6674 				       bool enable)
6675 {
6676 	u32 data, orig;
6677 
6678 	orig = data = RREG32(RLC_PG_CNTL);
6679 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6680 		data |= STATIC_PER_CU_PG_ENABLE;
6681 	else
6682 		data &= ~STATIC_PER_CU_PG_ENABLE;
6683 	if (orig != data)
6684 		WREG32(RLC_PG_CNTL, data);
6685 }
6686 
6687 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6688 					bool enable)
6689 {
6690 	u32 data, orig;
6691 
6692 	orig = data = RREG32(RLC_PG_CNTL);
6693 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6694 		data |= DYN_PER_CU_PG_ENABLE;
6695 	else
6696 		data &= ~DYN_PER_CU_PG_ENABLE;
6697 	if (orig != data)
6698 		WREG32(RLC_PG_CNTL, data);
6699 }
6700 
6701 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6702 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6703 
6704 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6705 {
6706 	u32 data, orig;
6707 	u32 i;
6708 
6709 	if (rdev->rlc.cs_data) {
6710 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6711 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6712 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6713 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6714 	} else {
6715 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6716 		for (i = 0; i < 3; i++)
6717 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6718 	}
6719 	if (rdev->rlc.reg_list) {
6720 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6721 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6722 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6723 	}
6724 
6725 	orig = data = RREG32(RLC_PG_CNTL);
6726 	data |= GFX_PG_SRC;
6727 	if (orig != data)
6728 		WREG32(RLC_PG_CNTL, data);
6729 
6730 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6731 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6732 
6733 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6734 	data &= ~IDLE_POLL_COUNT_MASK;
6735 	data |= IDLE_POLL_COUNT(0x60);
6736 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6737 
6738 	data = 0x10101010;
6739 	WREG32(RLC_PG_DELAY, data);
6740 
6741 	data = RREG32(RLC_PG_DELAY_2);
6742 	data &= ~0xff;
6743 	data |= 0x3;
6744 	WREG32(RLC_PG_DELAY_2, data);
6745 
6746 	data = RREG32(RLC_AUTO_PG_CTRL);
6747 	data &= ~GRBM_REG_SGIT_MASK;
6748 	data |= GRBM_REG_SGIT(0x700);
6749 	WREG32(RLC_AUTO_PG_CTRL, data);
6750 
6751 }
6752 
6753 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6754 {
6755 	cik_enable_gfx_cgpg(rdev, enable);
6756 	cik_enable_gfx_static_mgpg(rdev, enable);
6757 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6758 }
6759 
6760 u32 cik_get_csb_size(struct radeon_device *rdev)
6761 {
6762 	u32 count = 0;
6763 	const struct cs_section_def *sect = NULL;
6764 	const struct cs_extent_def *ext = NULL;
6765 
6766 	if (rdev->rlc.cs_data == NULL)
6767 		return 0;
6768 
6769 	/* begin clear state */
6770 	count += 2;
6771 	/* context control state */
6772 	count += 3;
6773 
6774 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6775 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6776 			if (sect->id == SECT_CONTEXT)
6777 				count += 2 + ext->reg_count;
6778 			else
6779 				return 0;
6780 		}
6781 	}
6782 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6783 	count += 4;
6784 	/* end clear state */
6785 	count += 2;
6786 	/* clear state */
6787 	count += 2;
6788 
6789 	return count;
6790 }
6791 
6792 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6793 {
6794 	u32 count = 0, i;
6795 	const struct cs_section_def *sect = NULL;
6796 	const struct cs_extent_def *ext = NULL;
6797 
6798 	if (rdev->rlc.cs_data == NULL)
6799 		return;
6800 	if (buffer == NULL)
6801 		return;
6802 
6803 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6804 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6805 
6806 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6807 	buffer[count++] = cpu_to_le32(0x80000000);
6808 	buffer[count++] = cpu_to_le32(0x80000000);
6809 
6810 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6811 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6812 			if (sect->id == SECT_CONTEXT) {
6813 				buffer[count++] =
6814 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6815 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6816 				for (i = 0; i < ext->reg_count; i++)
6817 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6818 			} else {
6819 				return;
6820 			}
6821 		}
6822 	}
6823 
6824 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6825 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6826 	switch (rdev->family) {
6827 	case CHIP_BONAIRE:
6828 		buffer[count++] = cpu_to_le32(0x16000012);
6829 		buffer[count++] = cpu_to_le32(0x00000000);
6830 		break;
6831 	case CHIP_KAVERI:
6832 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6833 		buffer[count++] = cpu_to_le32(0x00000000);
6834 		break;
6835 	case CHIP_KABINI:
6836 	case CHIP_MULLINS:
6837 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6838 		buffer[count++] = cpu_to_le32(0x00000000);
6839 		break;
6840 	case CHIP_HAWAII:
6841 		buffer[count++] = cpu_to_le32(0x3a00161a);
6842 		buffer[count++] = cpu_to_le32(0x0000002e);
6843 		break;
6844 	default:
6845 		buffer[count++] = cpu_to_le32(0x00000000);
6846 		buffer[count++] = cpu_to_le32(0x00000000);
6847 		break;
6848 	}
6849 
6850 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6851 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6852 
6853 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6854 	buffer[count++] = cpu_to_le32(0);
6855 }
6856 
6857 static void cik_init_pg(struct radeon_device *rdev)
6858 {
6859 	if (rdev->pg_flags) {
6860 		cik_enable_sck_slowdown_on_pu(rdev, true);
6861 		cik_enable_sck_slowdown_on_pd(rdev, true);
6862 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6863 			cik_init_gfx_cgpg(rdev);
6864 			cik_enable_cp_pg(rdev, true);
6865 			cik_enable_gds_pg(rdev, true);
6866 		}
6867 		cik_init_ao_cu_mask(rdev);
6868 		cik_update_gfx_pg(rdev, true);
6869 	}
6870 }
6871 
6872 static void cik_fini_pg(struct radeon_device *rdev)
6873 {
6874 	if (rdev->pg_flags) {
6875 		cik_update_gfx_pg(rdev, false);
6876 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6877 			cik_enable_cp_pg(rdev, false);
6878 			cik_enable_gds_pg(rdev, false);
6879 		}
6880 	}
6881 }
6882 
6883 /*
6884  * Interrupts
6885  * Starting with r6xx, interrupts are handled via a ring buffer.
6886  * Ring buffers are areas of GPU accessible memory that the GPU
6887  * writes interrupt vectors into and the host reads vectors out of.
6888  * There is a rptr (read pointer) that determines where the
6889  * host is currently reading, and a wptr (write pointer)
6890  * which determines where the GPU has written.  When the
6891  * pointers are equal, the ring is idle.  When the GPU
6892  * writes vectors to the ring buffer, it increments the
6893  * wptr.  When there is an interrupt, the host then starts
6894  * fetching commands and processing them until the pointers are
6895  * equal again at which point it updates the rptr.
6896  */
6897 
6898 /**
6899  * cik_enable_interrupts - Enable the interrupt ring buffer
6900  *
6901  * @rdev: radeon_device pointer
6902  *
6903  * Enable the interrupt ring buffer (CIK).
6904  */
6905 static void cik_enable_interrupts(struct radeon_device *rdev)
6906 {
6907 	u32 ih_cntl = RREG32(IH_CNTL);
6908 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6909 
6910 	ih_cntl |= ENABLE_INTR;
6911 	ih_rb_cntl |= IH_RB_ENABLE;
6912 	WREG32(IH_CNTL, ih_cntl);
6913 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6914 	rdev->ih.enabled = true;
6915 }
6916 
6917 /**
6918  * cik_disable_interrupts - Disable the interrupt ring buffer
6919  *
6920  * @rdev: radeon_device pointer
6921  *
6922  * Disable the interrupt ring buffer (CIK).
6923  */
6924 static void cik_disable_interrupts(struct radeon_device *rdev)
6925 {
6926 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6927 	u32 ih_cntl = RREG32(IH_CNTL);
6928 
6929 	ih_rb_cntl &= ~IH_RB_ENABLE;
6930 	ih_cntl &= ~ENABLE_INTR;
6931 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6932 	WREG32(IH_CNTL, ih_cntl);
6933 	/* set rptr, wptr to 0 */
6934 	WREG32(IH_RB_RPTR, 0);
6935 	WREG32(IH_RB_WPTR, 0);
6936 	rdev->ih.enabled = false;
6937 	rdev->ih.rptr = 0;
6938 }
6939 
6940 /**
6941  * cik_disable_interrupt_state - Disable all interrupt sources
6942  *
6943  * @rdev: radeon_device pointer
6944  *
6945  * Clear all interrupt enable bits used by the driver (CIK).
6946  */
6947 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6948 {
6949 	u32 tmp;
6950 
6951 	/* gfx ring */
6952 	tmp = RREG32(CP_INT_CNTL_RING0) &
6953 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6954 	WREG32(CP_INT_CNTL_RING0, tmp);
6955 	/* sdma */
6956 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6957 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6958 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6959 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6960 	/* compute queues */
6961 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6962 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6963 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6964 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6965 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6966 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6967 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6968 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6969 	/* grbm */
6970 	WREG32(GRBM_INT_CNTL, 0);
6971 	/* SRBM */
6972 	WREG32(SRBM_INT_CNTL, 0);
6973 	/* vline/vblank, etc. */
6974 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6975 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6976 	if (rdev->num_crtc >= 4) {
6977 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6978 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6979 	}
6980 	if (rdev->num_crtc >= 6) {
6981 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6982 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6983 	}
6984 	/* pflip */
6985 	if (rdev->num_crtc >= 2) {
6986 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6987 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6988 	}
6989 	if (rdev->num_crtc >= 4) {
6990 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6991 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6992 	}
6993 	if (rdev->num_crtc >= 6) {
6994 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6995 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6996 	}
6997 
6998 	/* dac hotplug */
6999 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7000 
7001 	/* digital hotplug */
7002 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7004 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7005 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7006 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7007 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7008 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7010 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7012 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7014 
7015 }
7016 
7017 /**
7018  * cik_irq_init - init and enable the interrupt ring
7019  *
7020  * @rdev: radeon_device pointer
7021  *
7022  * Allocate a ring buffer for the interrupt controller,
7023  * enable the RLC, disable interrupts, enable the IH
7024  * ring buffer and enable it (CIK).
7025  * Called at device load and reume.
7026  * Returns 0 for success, errors for failure.
7027  */
7028 static int cik_irq_init(struct radeon_device *rdev)
7029 {
7030 	int ret = 0;
7031 	int rb_bufsz;
7032 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7033 
7034 	/* allocate ring */
7035 	ret = r600_ih_ring_alloc(rdev);
7036 	if (ret)
7037 		return ret;
7038 
7039 	/* disable irqs */
7040 	cik_disable_interrupts(rdev);
7041 
7042 	/* init rlc */
7043 	ret = cik_rlc_resume(rdev);
7044 	if (ret) {
7045 		r600_ih_ring_fini(rdev);
7046 		return ret;
7047 	}
7048 
7049 	/* setup interrupt control */
7050 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7051 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7052 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7053 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7054 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7055 	 */
7056 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7057 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7058 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7059 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7060 
7061 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7062 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7063 
7064 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7065 		      IH_WPTR_OVERFLOW_CLEAR |
7066 		      (rb_bufsz << 1));
7067 
7068 	if (rdev->wb.enabled)
7069 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7070 
7071 	/* set the writeback address whether it's enabled or not */
7072 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7073 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7074 
7075 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7076 
7077 	/* set rptr, wptr to 0 */
7078 	WREG32(IH_RB_RPTR, 0);
7079 	WREG32(IH_RB_WPTR, 0);
7080 
7081 	/* Default settings for IH_CNTL (disabled at first) */
7082 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7083 	/* RPTR_REARM only works if msi's are enabled */
7084 	if (rdev->msi_enabled)
7085 		ih_cntl |= RPTR_REARM;
7086 	WREG32(IH_CNTL, ih_cntl);
7087 
7088 	/* force the active interrupt state to all disabled */
7089 	cik_disable_interrupt_state(rdev);
7090 
7091 	pci_set_master(rdev->pdev);
7092 
7093 	/* enable irqs */
7094 	cik_enable_interrupts(rdev);
7095 
7096 	return ret;
7097 }
7098 
7099 /**
7100  * cik_irq_set - enable/disable interrupt sources
7101  *
7102  * @rdev: radeon_device pointer
7103  *
7104  * Enable interrupt sources on the GPU (vblanks, hpd,
7105  * etc.) (CIK).
7106  * Returns 0 for success, errors for failure.
7107  */
7108 int cik_irq_set(struct radeon_device *rdev)
7109 {
7110 	u32 cp_int_cntl;
7111 	u32 cp_m1p0;
7112 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7113 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7114 	u32 grbm_int_cntl = 0;
7115 	u32 dma_cntl, dma_cntl1;
7116 
7117 	if (!rdev->irq.installed) {
7118 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7119 		return -EINVAL;
7120 	}
7121 	/* don't enable anything if the ih is disabled */
7122 	if (!rdev->ih.enabled) {
7123 		cik_disable_interrupts(rdev);
7124 		/* force the active interrupt state to all disabled */
7125 		cik_disable_interrupt_state(rdev);
7126 		return 0;
7127 	}
7128 
7129 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7130 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7131 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7132 
7133 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7134 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7135 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7136 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7137 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7138 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7139 
7140 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7141 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7142 
7143 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7144 
7145 	/* enable CP interrupts on all rings */
7146 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7147 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7148 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7149 	}
7150 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7151 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7152 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7153 		if (ring->me == 1) {
7154 			switch (ring->pipe) {
7155 			case 0:
7156 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7157 				break;
7158 			default:
7159 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7160 				break;
7161 			}
7162 		} else {
7163 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7164 		}
7165 	}
7166 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7167 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7168 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7169 		if (ring->me == 1) {
7170 			switch (ring->pipe) {
7171 			case 0:
7172 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7173 				break;
7174 			default:
7175 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7176 				break;
7177 			}
7178 		} else {
7179 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7180 		}
7181 	}
7182 
7183 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7184 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7185 		dma_cntl |= TRAP_ENABLE;
7186 	}
7187 
7188 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7189 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7190 		dma_cntl1 |= TRAP_ENABLE;
7191 	}
7192 
7193 	if (rdev->irq.crtc_vblank_int[0] ||
7194 	    atomic_read(&rdev->irq.pflip[0])) {
7195 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7196 		crtc1 |= VBLANK_INTERRUPT_MASK;
7197 	}
7198 	if (rdev->irq.crtc_vblank_int[1] ||
7199 	    atomic_read(&rdev->irq.pflip[1])) {
7200 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7201 		crtc2 |= VBLANK_INTERRUPT_MASK;
7202 	}
7203 	if (rdev->irq.crtc_vblank_int[2] ||
7204 	    atomic_read(&rdev->irq.pflip[2])) {
7205 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7206 		crtc3 |= VBLANK_INTERRUPT_MASK;
7207 	}
7208 	if (rdev->irq.crtc_vblank_int[3] ||
7209 	    atomic_read(&rdev->irq.pflip[3])) {
7210 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7211 		crtc4 |= VBLANK_INTERRUPT_MASK;
7212 	}
7213 	if (rdev->irq.crtc_vblank_int[4] ||
7214 	    atomic_read(&rdev->irq.pflip[4])) {
7215 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7216 		crtc5 |= VBLANK_INTERRUPT_MASK;
7217 	}
7218 	if (rdev->irq.crtc_vblank_int[5] ||
7219 	    atomic_read(&rdev->irq.pflip[5])) {
7220 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7221 		crtc6 |= VBLANK_INTERRUPT_MASK;
7222 	}
7223 	if (rdev->irq.hpd[0]) {
7224 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7225 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 	}
7227 	if (rdev->irq.hpd[1]) {
7228 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7229 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230 	}
7231 	if (rdev->irq.hpd[2]) {
7232 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7233 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7234 	}
7235 	if (rdev->irq.hpd[3]) {
7236 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7237 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7238 	}
7239 	if (rdev->irq.hpd[4]) {
7240 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7241 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7242 	}
7243 	if (rdev->irq.hpd[5]) {
7244 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7245 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7246 	}
7247 
7248 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7249 
7250 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7251 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7252 
7253 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7254 
7255 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7256 
7257 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7258 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7259 	if (rdev->num_crtc >= 4) {
7260 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7261 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7262 	}
7263 	if (rdev->num_crtc >= 6) {
7264 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7265 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7266 	}
7267 
7268 	if (rdev->num_crtc >= 2) {
7269 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7270 		       GRPH_PFLIP_INT_MASK);
7271 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7272 		       GRPH_PFLIP_INT_MASK);
7273 	}
7274 	if (rdev->num_crtc >= 4) {
7275 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7276 		       GRPH_PFLIP_INT_MASK);
7277 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7278 		       GRPH_PFLIP_INT_MASK);
7279 	}
7280 	if (rdev->num_crtc >= 6) {
7281 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7282 		       GRPH_PFLIP_INT_MASK);
7283 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7284 		       GRPH_PFLIP_INT_MASK);
7285 	}
7286 
7287 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7288 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7289 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7290 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7291 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7292 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7293 
7294 	/* posting read */
7295 	RREG32(SRBM_STATUS);
7296 
7297 	return 0;
7298 }
7299 
7300 /**
7301  * cik_irq_ack - ack interrupt sources
7302  *
7303  * @rdev: radeon_device pointer
7304  *
7305  * Ack interrupt sources on the GPU (vblanks, hpd,
7306  * etc.) (CIK).  Certain interrupts sources are sw
7307  * generated and do not require an explicit ack.
7308  */
7309 static inline void cik_irq_ack(struct radeon_device *rdev)
7310 {
7311 	u32 tmp;
7312 
7313 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7314 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7315 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7316 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7317 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7318 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7319 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7320 
7321 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7322 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7323 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7324 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7325 	if (rdev->num_crtc >= 4) {
7326 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7327 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7328 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7329 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7330 	}
7331 	if (rdev->num_crtc >= 6) {
7332 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7333 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7334 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7335 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7336 	}
7337 
7338 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7339 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7340 		       GRPH_PFLIP_INT_CLEAR);
7341 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7342 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7343 		       GRPH_PFLIP_INT_CLEAR);
7344 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7345 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7346 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7347 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7348 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7349 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7350 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7351 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7352 
7353 	if (rdev->num_crtc >= 4) {
7354 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7355 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7356 			       GRPH_PFLIP_INT_CLEAR);
7357 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7358 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7359 			       GRPH_PFLIP_INT_CLEAR);
7360 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7361 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7362 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7363 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7364 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7365 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7366 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7367 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7368 	}
7369 
7370 	if (rdev->num_crtc >= 6) {
7371 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7372 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7373 			       GRPH_PFLIP_INT_CLEAR);
7374 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7375 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7376 			       GRPH_PFLIP_INT_CLEAR);
7377 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7378 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7379 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7380 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7381 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7382 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7383 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7384 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7385 	}
7386 
7387 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7388 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7389 		tmp |= DC_HPDx_INT_ACK;
7390 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7391 	}
7392 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7393 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7394 		tmp |= DC_HPDx_INT_ACK;
7395 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7396 	}
7397 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7398 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7399 		tmp |= DC_HPDx_INT_ACK;
7400 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7401 	}
7402 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7403 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7404 		tmp |= DC_HPDx_INT_ACK;
7405 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7406 	}
7407 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7408 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7409 		tmp |= DC_HPDx_INT_ACK;
7410 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7411 	}
7412 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7413 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7414 		tmp |= DC_HPDx_INT_ACK;
7415 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7416 	}
7417 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7418 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7419 		tmp |= DC_HPDx_RX_INT_ACK;
7420 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7421 	}
7422 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7423 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7424 		tmp |= DC_HPDx_RX_INT_ACK;
7425 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7426 	}
7427 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7428 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7429 		tmp |= DC_HPDx_RX_INT_ACK;
7430 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7431 	}
7432 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7433 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7434 		tmp |= DC_HPDx_RX_INT_ACK;
7435 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7436 	}
7437 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7438 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7439 		tmp |= DC_HPDx_RX_INT_ACK;
7440 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7441 	}
7442 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7443 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7444 		tmp |= DC_HPDx_RX_INT_ACK;
7445 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7446 	}
7447 }
7448 
7449 /**
7450  * cik_irq_disable - disable interrupts
7451  *
7452  * @rdev: radeon_device pointer
7453  *
7454  * Disable interrupts on the hw (CIK).
7455  */
7456 static void cik_irq_disable(struct radeon_device *rdev)
7457 {
7458 	cik_disable_interrupts(rdev);
7459 	/* Wait and acknowledge irq */
7460 	mdelay(1);
7461 	cik_irq_ack(rdev);
7462 	cik_disable_interrupt_state(rdev);
7463 }
7464 
7465 /**
7466  * cik_irq_disable - disable interrupts for suspend
7467  *
7468  * @rdev: radeon_device pointer
7469  *
7470  * Disable interrupts and stop the RLC (CIK).
7471  * Used for suspend.
7472  */
7473 static void cik_irq_suspend(struct radeon_device *rdev)
7474 {
7475 	cik_irq_disable(rdev);
7476 	cik_rlc_stop(rdev);
7477 }
7478 
7479 /**
7480  * cik_irq_fini - tear down interrupt support
7481  *
7482  * @rdev: radeon_device pointer
7483  *
7484  * Disable interrupts on the hw and free the IH ring
7485  * buffer (CIK).
7486  * Used for driver unload.
7487  */
7488 static void cik_irq_fini(struct radeon_device *rdev)
7489 {
7490 	cik_irq_suspend(rdev);
7491 	r600_ih_ring_fini(rdev);
7492 }
7493 
7494 /**
7495  * cik_get_ih_wptr - get the IH ring buffer wptr
7496  *
7497  * @rdev: radeon_device pointer
7498  *
7499  * Get the IH ring buffer wptr from either the register
7500  * or the writeback memory buffer (CIK).  Also check for
7501  * ring buffer overflow and deal with it.
7502  * Used by cik_irq_process().
7503  * Returns the value of the wptr.
7504  */
7505 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7506 {
7507 	u32 wptr, tmp;
7508 
7509 	if (rdev->wb.enabled)
7510 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7511 	else
7512 		wptr = RREG32(IH_RB_WPTR);
7513 
7514 	if (wptr & RB_OVERFLOW) {
7515 		wptr &= ~RB_OVERFLOW;
7516 		/* When a ring buffer overflow happen start parsing interrupt
7517 		 * from the last not overwritten vector (wptr + 16). Hopefully
7518 		 * this should allow us to catchup.
7519 		 */
7520 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7521 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7522 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7523 		tmp = RREG32(IH_RB_CNTL);
7524 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7525 		WREG32(IH_RB_CNTL, tmp);
7526 	}
7527 	return (wptr & rdev->ih.ptr_mask);
7528 }
7529 
7530 /*        CIK IV Ring
7531  * Each IV ring entry is 128 bits:
7532  * [7:0]    - interrupt source id
7533  * [31:8]   - reserved
7534  * [59:32]  - interrupt source data
7535  * [63:60]  - reserved
7536  * [71:64]  - RINGID
7537  *            CP:
7538  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7539  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7540  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7541  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7542  *            PIPE_ID - ME0 0=3D
7543  *                    - ME1&2 compute dispatcher (4 pipes each)
7544  *            SDMA:
7545  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7546  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7547  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7548  * [79:72]  - VMID
7549  * [95:80]  - PASID
7550  * [127:96] - reserved
7551  */
7552 /**
7553  * cik_irq_process - interrupt handler
7554  *
7555  * @rdev: radeon_device pointer
7556  *
7557  * Interrupt hander (CIK).  Walk the IH ring,
7558  * ack interrupts and schedule work to handle
7559  * interrupt events.
7560  * Returns irq process return code.
7561  */
7562 int cik_irq_process(struct radeon_device *rdev)
7563 {
7564 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7565 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7566 	u32 wptr;
7567 	u32 rptr;
7568 	u32 src_id, src_data, ring_id;
7569 	u8 me_id, pipe_id, queue_id;
7570 	u32 ring_index;
7571 	bool queue_hotplug = false;
7572 	bool queue_dp = false;
7573 	bool queue_reset = false;
7574 	u32 addr, status, mc_client;
7575 	bool queue_thermal = false;
7576 
7577 	if (!rdev->ih.enabled || rdev->shutdown)
7578 		return IRQ_NONE;
7579 
7580 	wptr = cik_get_ih_wptr(rdev);
7581 
7582 restart_ih:
7583 	/* is somebody else already processing irqs? */
7584 	if (atomic_xchg(&rdev->ih.lock, 1))
7585 		return IRQ_NONE;
7586 
7587 	rptr = rdev->ih.rptr;
7588 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7589 
7590 	/* Order reading of wptr vs. reading of IH ring data */
7591 	rmb();
7592 
7593 	/* display interrupts */
7594 	cik_irq_ack(rdev);
7595 
7596 	while (rptr != wptr) {
7597 		/* wptr/rptr are in bytes! */
7598 		ring_index = rptr / 4;
7599 
7600 		radeon_kfd_interrupt(rdev,
7601 				(const void *) &rdev->ih.ring[ring_index]);
7602 
7603 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7604 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7605 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7606 
7607 		switch (src_id) {
7608 		case 1: /* D1 vblank/vline */
7609 			switch (src_data) {
7610 			case 0: /* D1 vblank */
7611 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7612 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7613 
7614 				if (rdev->irq.crtc_vblank_int[0]) {
7615 					drm_handle_vblank(rdev->ddev, 0);
7616 					rdev->pm.vblank_sync = true;
7617 					wake_up(&rdev->irq.vblank_queue);
7618 				}
7619 				if (atomic_read(&rdev->irq.pflip[0]))
7620 					radeon_crtc_handle_vblank(rdev, 0);
7621 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7622 				DRM_DEBUG("IH: D1 vblank\n");
7623 
7624 				break;
7625 			case 1: /* D1 vline */
7626 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7627 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7628 
7629 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7630 				DRM_DEBUG("IH: D1 vline\n");
7631 
7632 				break;
7633 			default:
7634 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7635 				break;
7636 			}
7637 			break;
7638 		case 2: /* D2 vblank/vline */
7639 			switch (src_data) {
7640 			case 0: /* D2 vblank */
7641 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7642 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7643 
7644 				if (rdev->irq.crtc_vblank_int[1]) {
7645 					drm_handle_vblank(rdev->ddev, 1);
7646 					rdev->pm.vblank_sync = true;
7647 					wake_up(&rdev->irq.vblank_queue);
7648 				}
7649 				if (atomic_read(&rdev->irq.pflip[1]))
7650 					radeon_crtc_handle_vblank(rdev, 1);
7651 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7652 				DRM_DEBUG("IH: D2 vblank\n");
7653 
7654 				break;
7655 			case 1: /* D2 vline */
7656 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7657 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7658 
7659 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7660 				DRM_DEBUG("IH: D2 vline\n");
7661 
7662 				break;
7663 			default:
7664 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7665 				break;
7666 			}
7667 			break;
7668 		case 3: /* D3 vblank/vline */
7669 			switch (src_data) {
7670 			case 0: /* D3 vblank */
7671 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7672 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7673 
7674 				if (rdev->irq.crtc_vblank_int[2]) {
7675 					drm_handle_vblank(rdev->ddev, 2);
7676 					rdev->pm.vblank_sync = true;
7677 					wake_up(&rdev->irq.vblank_queue);
7678 				}
7679 				if (atomic_read(&rdev->irq.pflip[2]))
7680 					radeon_crtc_handle_vblank(rdev, 2);
7681 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7682 				DRM_DEBUG("IH: D3 vblank\n");
7683 
7684 				break;
7685 			case 1: /* D3 vline */
7686 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7687 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688 
7689 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7690 				DRM_DEBUG("IH: D3 vline\n");
7691 
7692 				break;
7693 			default:
7694 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7695 				break;
7696 			}
7697 			break;
7698 		case 4: /* D4 vblank/vline */
7699 			switch (src_data) {
7700 			case 0: /* D4 vblank */
7701 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7702 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7703 
7704 				if (rdev->irq.crtc_vblank_int[3]) {
7705 					drm_handle_vblank(rdev->ddev, 3);
7706 					rdev->pm.vblank_sync = true;
7707 					wake_up(&rdev->irq.vblank_queue);
7708 				}
7709 				if (atomic_read(&rdev->irq.pflip[3]))
7710 					radeon_crtc_handle_vblank(rdev, 3);
7711 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7712 				DRM_DEBUG("IH: D4 vblank\n");
7713 
7714 				break;
7715 			case 1: /* D4 vline */
7716 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7717 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7718 
7719 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7720 				DRM_DEBUG("IH: D4 vline\n");
7721 
7722 				break;
7723 			default:
7724 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7725 				break;
7726 			}
7727 			break;
7728 		case 5: /* D5 vblank/vline */
7729 			switch (src_data) {
7730 			case 0: /* D5 vblank */
7731 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7732 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7733 
7734 				if (rdev->irq.crtc_vblank_int[4]) {
7735 					drm_handle_vblank(rdev->ddev, 4);
7736 					rdev->pm.vblank_sync = true;
7737 					wake_up(&rdev->irq.vblank_queue);
7738 				}
7739 				if (atomic_read(&rdev->irq.pflip[4]))
7740 					radeon_crtc_handle_vblank(rdev, 4);
7741 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7742 				DRM_DEBUG("IH: D5 vblank\n");
7743 
7744 				break;
7745 			case 1: /* D5 vline */
7746 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7747 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7748 
7749 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7750 				DRM_DEBUG("IH: D5 vline\n");
7751 
7752 				break;
7753 			default:
7754 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7755 				break;
7756 			}
7757 			break;
7758 		case 6: /* D6 vblank/vline */
7759 			switch (src_data) {
7760 			case 0: /* D6 vblank */
7761 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7762 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7763 
7764 				if (rdev->irq.crtc_vblank_int[5]) {
7765 					drm_handle_vblank(rdev->ddev, 5);
7766 					rdev->pm.vblank_sync = true;
7767 					wake_up(&rdev->irq.vblank_queue);
7768 				}
7769 				if (atomic_read(&rdev->irq.pflip[5]))
7770 					radeon_crtc_handle_vblank(rdev, 5);
7771 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7772 				DRM_DEBUG("IH: D6 vblank\n");
7773 
7774 				break;
7775 			case 1: /* D6 vline */
7776 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7777 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778 
7779 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7780 				DRM_DEBUG("IH: D6 vline\n");
7781 
7782 				break;
7783 			default:
7784 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7785 				break;
7786 			}
7787 			break;
7788 		case 8: /* D1 page flip */
7789 		case 10: /* D2 page flip */
7790 		case 12: /* D3 page flip */
7791 		case 14: /* D4 page flip */
7792 		case 16: /* D5 page flip */
7793 		case 18: /* D6 page flip */
7794 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7795 			if (radeon_use_pflipirq > 0)
7796 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7797 			break;
7798 		case 42: /* HPD hotplug */
7799 			switch (src_data) {
7800 			case 0:
7801 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7802 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803 
7804 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7805 				queue_hotplug = true;
7806 				DRM_DEBUG("IH: HPD1\n");
7807 
7808 				break;
7809 			case 1:
7810 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7811 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812 
7813 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7814 				queue_hotplug = true;
7815 				DRM_DEBUG("IH: HPD2\n");
7816 
7817 				break;
7818 			case 2:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD3\n");
7825 
7826 				break;
7827 			case 3:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7832 				queue_hotplug = true;
7833 				DRM_DEBUG("IH: HPD4\n");
7834 
7835 				break;
7836 			case 4:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7841 				queue_hotplug = true;
7842 				DRM_DEBUG("IH: HPD5\n");
7843 
7844 				break;
7845 			case 5:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7850 				queue_hotplug = true;
7851 				DRM_DEBUG("IH: HPD6\n");
7852 
7853 				break;
7854 			case 6:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7859 				queue_dp = true;
7860 				DRM_DEBUG("IH: HPD_RX 1\n");
7861 
7862 				break;
7863 			case 7:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7868 				queue_dp = true;
7869 				DRM_DEBUG("IH: HPD_RX 2\n");
7870 
7871 				break;
7872 			case 8:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 3\n");
7879 
7880 				break;
7881 			case 9:
7882 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7883 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884 
7885 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7886 				queue_dp = true;
7887 				DRM_DEBUG("IH: HPD_RX 4\n");
7888 
7889 				break;
7890 			case 10:
7891 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7892 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893 
7894 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7895 				queue_dp = true;
7896 				DRM_DEBUG("IH: HPD_RX 5\n");
7897 
7898 				break;
7899 			case 11:
7900 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7901 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7902 
7903 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7904 				queue_dp = true;
7905 				DRM_DEBUG("IH: HPD_RX 6\n");
7906 
7907 				break;
7908 			default:
7909 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7910 				break;
7911 			}
7912 			break;
7913 		case 96:
7914 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7915 			WREG32(SRBM_INT_ACK, 0x1);
7916 			break;
7917 		case 124: /* UVD */
7918 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7919 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7920 			break;
7921 		case 146:
7922 		case 147:
7923 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7924 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7925 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7926 			/* reset addr and status */
7927 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7928 			if (addr == 0x0 && status == 0x0)
7929 				break;
7930 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7931 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7932 				addr);
7933 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7934 				status);
7935 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7936 			break;
7937 		case 167: /* VCE */
7938 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7939 			switch (src_data) {
7940 			case 0:
7941 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7942 				break;
7943 			case 1:
7944 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7945 				break;
7946 			default:
7947 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7948 				break;
7949 			}
7950 			break;
7951 		case 176: /* GFX RB CP_INT */
7952 		case 177: /* GFX IB CP_INT */
7953 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7954 			break;
7955 		case 181: /* CP EOP event */
7956 			DRM_DEBUG("IH: CP EOP\n");
7957 			/* XXX check the bitfield order! */
7958 			me_id = (ring_id & 0x60) >> 5;
7959 			pipe_id = (ring_id & 0x18) >> 3;
7960 			queue_id = (ring_id & 0x7) >> 0;
7961 			switch (me_id) {
7962 			case 0:
7963 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7964 				break;
7965 			case 1:
7966 			case 2:
7967 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7968 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7969 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7970 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7971 				break;
7972 			}
7973 			break;
7974 		case 184: /* CP Privileged reg access */
7975 			DRM_ERROR("Illegal register access in command stream\n");
7976 			/* XXX check the bitfield order! */
7977 			me_id = (ring_id & 0x60) >> 5;
7978 			pipe_id = (ring_id & 0x18) >> 3;
7979 			queue_id = (ring_id & 0x7) >> 0;
7980 			switch (me_id) {
7981 			case 0:
7982 				/* This results in a full GPU reset, but all we need to do is soft
7983 				 * reset the CP for gfx
7984 				 */
7985 				queue_reset = true;
7986 				break;
7987 			case 1:
7988 				/* XXX compute */
7989 				queue_reset = true;
7990 				break;
7991 			case 2:
7992 				/* XXX compute */
7993 				queue_reset = true;
7994 				break;
7995 			}
7996 			break;
7997 		case 185: /* CP Privileged inst */
7998 			DRM_ERROR("Illegal instruction in command stream\n");
7999 			/* XXX check the bitfield order! */
8000 			me_id = (ring_id & 0x60) >> 5;
8001 			pipe_id = (ring_id & 0x18) >> 3;
8002 			queue_id = (ring_id & 0x7) >> 0;
8003 			switch (me_id) {
8004 			case 0:
8005 				/* This results in a full GPU reset, but all we need to do is soft
8006 				 * reset the CP for gfx
8007 				 */
8008 				queue_reset = true;
8009 				break;
8010 			case 1:
8011 				/* XXX compute */
8012 				queue_reset = true;
8013 				break;
8014 			case 2:
8015 				/* XXX compute */
8016 				queue_reset = true;
8017 				break;
8018 			}
8019 			break;
8020 		case 224: /* SDMA trap event */
8021 			/* XXX check the bitfield order! */
8022 			me_id = (ring_id & 0x3) >> 0;
8023 			queue_id = (ring_id & 0xc) >> 2;
8024 			DRM_DEBUG("IH: SDMA trap\n");
8025 			switch (me_id) {
8026 			case 0:
8027 				switch (queue_id) {
8028 				case 0:
8029 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8030 					break;
8031 				case 1:
8032 					/* XXX compute */
8033 					break;
8034 				case 2:
8035 					/* XXX compute */
8036 					break;
8037 				}
8038 				break;
8039 			case 1:
8040 				switch (queue_id) {
8041 				case 0:
8042 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8043 					break;
8044 				case 1:
8045 					/* XXX compute */
8046 					break;
8047 				case 2:
8048 					/* XXX compute */
8049 					break;
8050 				}
8051 				break;
8052 			}
8053 			break;
8054 		case 230: /* thermal low to high */
8055 			DRM_DEBUG("IH: thermal low to high\n");
8056 			rdev->pm.dpm.thermal.high_to_low = false;
8057 			queue_thermal = true;
8058 			break;
8059 		case 231: /* thermal high to low */
8060 			DRM_DEBUG("IH: thermal high to low\n");
8061 			rdev->pm.dpm.thermal.high_to_low = true;
8062 			queue_thermal = true;
8063 			break;
8064 		case 233: /* GUI IDLE */
8065 			DRM_DEBUG("IH: GUI idle\n");
8066 			break;
8067 		case 241: /* SDMA Privileged inst */
8068 		case 247: /* SDMA Privileged inst */
8069 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8070 			/* XXX check the bitfield order! */
8071 			me_id = (ring_id & 0x3) >> 0;
8072 			queue_id = (ring_id & 0xc) >> 2;
8073 			switch (me_id) {
8074 			case 0:
8075 				switch (queue_id) {
8076 				case 0:
8077 					queue_reset = true;
8078 					break;
8079 				case 1:
8080 					/* XXX compute */
8081 					queue_reset = true;
8082 					break;
8083 				case 2:
8084 					/* XXX compute */
8085 					queue_reset = true;
8086 					break;
8087 				}
8088 				break;
8089 			case 1:
8090 				switch (queue_id) {
8091 				case 0:
8092 					queue_reset = true;
8093 					break;
8094 				case 1:
8095 					/* XXX compute */
8096 					queue_reset = true;
8097 					break;
8098 				case 2:
8099 					/* XXX compute */
8100 					queue_reset = true;
8101 					break;
8102 				}
8103 				break;
8104 			}
8105 			break;
8106 		default:
8107 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8108 			break;
8109 		}
8110 
8111 		/* wptr/rptr are in bytes! */
8112 		rptr += 16;
8113 		rptr &= rdev->ih.ptr_mask;
8114 		WREG32(IH_RB_RPTR, rptr);
8115 	}
8116 	if (queue_dp)
8117 		schedule_work(&rdev->dp_work);
8118 	if (queue_hotplug)
8119 		schedule_delayed_work(&rdev->hotplug_work, 0);
8120 	if (queue_reset) {
8121 		rdev->needs_reset = true;
8122 		wake_up_all(&rdev->fence_queue);
8123 	}
8124 	if (queue_thermal)
8125 		schedule_work(&rdev->pm.dpm.thermal.work);
8126 	rdev->ih.rptr = rptr;
8127 	atomic_set(&rdev->ih.lock, 0);
8128 
8129 	/* make sure wptr hasn't changed while processing */
8130 	wptr = cik_get_ih_wptr(rdev);
8131 	if (wptr != rptr)
8132 		goto restart_ih;
8133 
8134 	return IRQ_HANDLED;
8135 }
8136 
8137 /*
8138  * startup/shutdown callbacks
8139  */
8140 /**
8141  * cik_startup - program the asic to a functional state
8142  *
8143  * @rdev: radeon_device pointer
8144  *
8145  * Programs the asic to a functional state (CIK).
8146  * Called by cik_init() and cik_resume().
8147  * Returns 0 for success, error for failure.
8148  */
8149 static int cik_startup(struct radeon_device *rdev)
8150 {
8151 	struct radeon_ring *ring;
8152 	u32 nop;
8153 	int r;
8154 
8155 	/* enable pcie gen2/3 link */
8156 	cik_pcie_gen3_enable(rdev);
8157 	/* enable aspm */
8158 	cik_program_aspm(rdev);
8159 
8160 	/* scratch needs to be initialized before MC */
8161 	r = r600_vram_scratch_init(rdev);
8162 	if (r)
8163 		return r;
8164 
8165 	cik_mc_program(rdev);
8166 
8167 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8168 		r = ci_mc_load_microcode(rdev);
8169 		if (r) {
8170 			DRM_ERROR("Failed to load MC firmware!\n");
8171 			return r;
8172 		}
8173 	}
8174 
8175 	r = cik_pcie_gart_enable(rdev);
8176 	if (r)
8177 		return r;
8178 	cik_gpu_init(rdev);
8179 
8180 	/* allocate rlc buffers */
8181 	if (rdev->flags & RADEON_IS_IGP) {
8182 		if (rdev->family == CHIP_KAVERI) {
8183 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8184 			rdev->rlc.reg_list_size =
8185 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8186 		} else {
8187 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8188 			rdev->rlc.reg_list_size =
8189 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8190 		}
8191 	}
8192 	rdev->rlc.cs_data = ci_cs_data;
8193 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8194 	r = sumo_rlc_init(rdev);
8195 	if (r) {
8196 		DRM_ERROR("Failed to init rlc BOs!\n");
8197 		return r;
8198 	}
8199 
8200 	/* allocate wb buffer */
8201 	r = radeon_wb_init(rdev);
8202 	if (r)
8203 		return r;
8204 
8205 	/* allocate mec buffers */
8206 	r = cik_mec_init(rdev);
8207 	if (r) {
8208 		DRM_ERROR("Failed to init MEC BOs!\n");
8209 		return r;
8210 	}
8211 
8212 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8213 	if (r) {
8214 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8215 		return r;
8216 	}
8217 
8218 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8219 	if (r) {
8220 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8221 		return r;
8222 	}
8223 
8224 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8225 	if (r) {
8226 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8227 		return r;
8228 	}
8229 
8230 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8231 	if (r) {
8232 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8233 		return r;
8234 	}
8235 
8236 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8237 	if (r) {
8238 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8239 		return r;
8240 	}
8241 
8242 	r = radeon_uvd_resume(rdev);
8243 	if (!r) {
8244 		r = uvd_v4_2_resume(rdev);
8245 		if (!r) {
8246 			r = radeon_fence_driver_start_ring(rdev,
8247 							   R600_RING_TYPE_UVD_INDEX);
8248 			if (r)
8249 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8250 		}
8251 	}
8252 	if (r)
8253 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8254 
8255 	r = radeon_vce_resume(rdev);
8256 	if (!r) {
8257 		r = vce_v2_0_resume(rdev);
8258 		if (!r)
8259 			r = radeon_fence_driver_start_ring(rdev,
8260 							   TN_RING_TYPE_VCE1_INDEX);
8261 		if (!r)
8262 			r = radeon_fence_driver_start_ring(rdev,
8263 							   TN_RING_TYPE_VCE2_INDEX);
8264 	}
8265 	if (r) {
8266 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8267 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8268 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8269 	}
8270 
8271 	/* Enable IRQ */
8272 	if (!rdev->irq.installed) {
8273 		r = radeon_irq_kms_init(rdev);
8274 		if (r)
8275 			return r;
8276 	}
8277 
8278 	r = cik_irq_init(rdev);
8279 	if (r) {
8280 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8281 		radeon_irq_kms_fini(rdev);
8282 		return r;
8283 	}
8284 	cik_irq_set(rdev);
8285 
8286 	if (rdev->family == CHIP_HAWAII) {
8287 		if (rdev->new_fw)
8288 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8289 		else
8290 			nop = RADEON_CP_PACKET2;
8291 	} else {
8292 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8293 	}
8294 
8295 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8296 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8297 			     nop);
8298 	if (r)
8299 		return r;
8300 
8301 	/* set up the compute queues */
8302 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8303 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8304 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8305 			     nop);
8306 	if (r)
8307 		return r;
8308 	ring->me = 1; /* first MEC */
8309 	ring->pipe = 0; /* first pipe */
8310 	ring->queue = 0; /* first queue */
8311 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8312 
8313 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8314 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8315 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8316 			     nop);
8317 	if (r)
8318 		return r;
8319 	/* dGPU only have 1 MEC */
8320 	ring->me = 1; /* first MEC */
8321 	ring->pipe = 0; /* first pipe */
8322 	ring->queue = 1; /* second queue */
8323 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8324 
8325 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8326 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8327 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8328 	if (r)
8329 		return r;
8330 
8331 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8332 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8333 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8334 	if (r)
8335 		return r;
8336 
8337 	r = cik_cp_resume(rdev);
8338 	if (r)
8339 		return r;
8340 
8341 	r = cik_sdma_resume(rdev);
8342 	if (r)
8343 		return r;
8344 
8345 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8346 	if (ring->ring_size) {
8347 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8348 				     RADEON_CP_PACKET2);
8349 		if (!r)
8350 			r = uvd_v1_0_init(rdev);
8351 		if (r)
8352 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8353 	}
8354 
8355 	r = -ENOENT;
8356 
8357 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8358 	if (ring->ring_size)
8359 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8360 				     VCE_CMD_NO_OP);
8361 
8362 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8363 	if (ring->ring_size)
8364 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8365 				     VCE_CMD_NO_OP);
8366 
8367 	if (!r)
8368 		r = vce_v1_0_init(rdev);
8369 	else if (r != -ENOENT)
8370 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8371 
8372 	r = radeon_ib_pool_init(rdev);
8373 	if (r) {
8374 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8375 		return r;
8376 	}
8377 
8378 	r = radeon_vm_manager_init(rdev);
8379 	if (r) {
8380 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8381 		return r;
8382 	}
8383 
8384 	r = radeon_audio_init(rdev);
8385 	if (r)
8386 		return r;
8387 
8388 	r = radeon_kfd_resume(rdev);
8389 	if (r)
8390 		return r;
8391 
8392 	return 0;
8393 }
8394 
8395 /**
8396  * cik_resume - resume the asic to a functional state
8397  *
8398  * @rdev: radeon_device pointer
8399  *
8400  * Programs the asic to a functional state (CIK).
8401  * Called at resume.
8402  * Returns 0 for success, error for failure.
8403  */
8404 int cik_resume(struct radeon_device *rdev)
8405 {
8406 	int r;
8407 
8408 	/* post card */
8409 	atom_asic_init(rdev->mode_info.atom_context);
8410 
8411 	/* init golden registers */
8412 	cik_init_golden_registers(rdev);
8413 
8414 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8415 		radeon_pm_resume(rdev);
8416 
8417 	rdev->accel_working = true;
8418 	r = cik_startup(rdev);
8419 	if (r) {
8420 		DRM_ERROR("cik startup failed on resume\n");
8421 		rdev->accel_working = false;
8422 		return r;
8423 	}
8424 
8425 	return r;
8426 
8427 }
8428 
8429 /**
8430  * cik_suspend - suspend the asic
8431  *
8432  * @rdev: radeon_device pointer
8433  *
8434  * Bring the chip into a state suitable for suspend (CIK).
8435  * Called at suspend.
8436  * Returns 0 for success.
8437  */
8438 int cik_suspend(struct radeon_device *rdev)
8439 {
8440 	radeon_kfd_suspend(rdev);
8441 	radeon_pm_suspend(rdev);
8442 	radeon_audio_fini(rdev);
8443 	radeon_vm_manager_fini(rdev);
8444 	cik_cp_enable(rdev, false);
8445 	cik_sdma_enable(rdev, false);
8446 	uvd_v1_0_fini(rdev);
8447 	radeon_uvd_suspend(rdev);
8448 	radeon_vce_suspend(rdev);
8449 	cik_fini_pg(rdev);
8450 	cik_fini_cg(rdev);
8451 	cik_irq_suspend(rdev);
8452 	radeon_wb_disable(rdev);
8453 	cik_pcie_gart_disable(rdev);
8454 	return 0;
8455 }
8456 
8457 /* Plan is to move initialization in that function and use
8458  * helper function so that radeon_device_init pretty much
8459  * do nothing more than calling asic specific function. This
8460  * should also allow to remove a bunch of callback function
8461  * like vram_info.
8462  */
8463 /**
8464  * cik_init - asic specific driver and hw init
8465  *
8466  * @rdev: radeon_device pointer
8467  *
8468  * Setup asic specific driver variables and program the hw
8469  * to a functional state (CIK).
8470  * Called at driver startup.
8471  * Returns 0 for success, errors for failure.
8472  */
8473 int cik_init(struct radeon_device *rdev)
8474 {
8475 	struct radeon_ring *ring;
8476 	int r;
8477 
8478 	/* Read BIOS */
8479 	if (!radeon_get_bios(rdev)) {
8480 		if (ASIC_IS_AVIVO(rdev))
8481 			return -EINVAL;
8482 	}
8483 	/* Must be an ATOMBIOS */
8484 	if (!rdev->is_atom_bios) {
8485 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8486 		return -EINVAL;
8487 	}
8488 	r = radeon_atombios_init(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	/* Post card if necessary */
8493 	if (!radeon_card_posted(rdev)) {
8494 		if (!rdev->bios) {
8495 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8496 			return -EINVAL;
8497 		}
8498 		DRM_INFO("GPU not posted. posting now...\n");
8499 		atom_asic_init(rdev->mode_info.atom_context);
8500 	}
8501 	/* init golden registers */
8502 	cik_init_golden_registers(rdev);
8503 	/* Initialize scratch registers */
8504 	cik_scratch_init(rdev);
8505 	/* Initialize surface registers */
8506 	radeon_surface_init(rdev);
8507 	/* Initialize clocks */
8508 	radeon_get_clock_info(rdev->ddev);
8509 
8510 	/* Fence driver */
8511 	r = radeon_fence_driver_init(rdev);
8512 	if (r)
8513 		return r;
8514 
8515 	/* initialize memory controller */
8516 	r = cik_mc_init(rdev);
8517 	if (r)
8518 		return r;
8519 	/* Memory manager */
8520 	r = radeon_bo_init(rdev);
8521 	if (r)
8522 		return r;
8523 
8524 	if (rdev->flags & RADEON_IS_IGP) {
8525 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8526 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8527 			r = cik_init_microcode(rdev);
8528 			if (r) {
8529 				DRM_ERROR("Failed to load firmware!\n");
8530 				return r;
8531 			}
8532 		}
8533 	} else {
8534 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8535 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8536 		    !rdev->mc_fw) {
8537 			r = cik_init_microcode(rdev);
8538 			if (r) {
8539 				DRM_ERROR("Failed to load firmware!\n");
8540 				return r;
8541 			}
8542 		}
8543 	}
8544 
8545 	/* Initialize power management */
8546 	radeon_pm_init(rdev);
8547 
8548 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8549 	ring->ring_obj = NULL;
8550 	r600_ring_init(rdev, ring, 1024 * 1024);
8551 
8552 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8553 	ring->ring_obj = NULL;
8554 	r600_ring_init(rdev, ring, 1024 * 1024);
8555 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8556 	if (r)
8557 		return r;
8558 
8559 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8560 	ring->ring_obj = NULL;
8561 	r600_ring_init(rdev, ring, 1024 * 1024);
8562 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8563 	if (r)
8564 		return r;
8565 
8566 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8567 	ring->ring_obj = NULL;
8568 	r600_ring_init(rdev, ring, 256 * 1024);
8569 
8570 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8571 	ring->ring_obj = NULL;
8572 	r600_ring_init(rdev, ring, 256 * 1024);
8573 
8574 	r = radeon_uvd_init(rdev);
8575 	if (!r) {
8576 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8577 		ring->ring_obj = NULL;
8578 		r600_ring_init(rdev, ring, 4096);
8579 	}
8580 
8581 	r = radeon_vce_init(rdev);
8582 	if (!r) {
8583 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8584 		ring->ring_obj = NULL;
8585 		r600_ring_init(rdev, ring, 4096);
8586 
8587 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8588 		ring->ring_obj = NULL;
8589 		r600_ring_init(rdev, ring, 4096);
8590 	}
8591 
8592 	rdev->ih.ring_obj = NULL;
8593 	r600_ih_ring_init(rdev, 64 * 1024);
8594 
8595 	r = r600_pcie_gart_init(rdev);
8596 	if (r)
8597 		return r;
8598 
8599 	rdev->accel_working = true;
8600 	r = cik_startup(rdev);
8601 	if (r) {
8602 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8603 		cik_cp_fini(rdev);
8604 		cik_sdma_fini(rdev);
8605 		cik_irq_fini(rdev);
8606 		sumo_rlc_fini(rdev);
8607 		cik_mec_fini(rdev);
8608 		radeon_wb_fini(rdev);
8609 		radeon_ib_pool_fini(rdev);
8610 		radeon_vm_manager_fini(rdev);
8611 		radeon_irq_kms_fini(rdev);
8612 		cik_pcie_gart_fini(rdev);
8613 		rdev->accel_working = false;
8614 	}
8615 
8616 	/* Don't start up if the MC ucode is missing.
8617 	 * The default clocks and voltages before the MC ucode
8618 	 * is loaded are not suffient for advanced operations.
8619 	 */
8620 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8621 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8622 		return -EINVAL;
8623 	}
8624 
8625 	return 0;
8626 }
8627 
8628 /**
8629  * cik_fini - asic specific driver and hw fini
8630  *
8631  * @rdev: radeon_device pointer
8632  *
8633  * Tear down the asic specific driver variables and program the hw
8634  * to an idle state (CIK).
8635  * Called at driver unload.
8636  */
8637 void cik_fini(struct radeon_device *rdev)
8638 {
8639 	radeon_pm_fini(rdev);
8640 	cik_cp_fini(rdev);
8641 	cik_sdma_fini(rdev);
8642 	cik_fini_pg(rdev);
8643 	cik_fini_cg(rdev);
8644 	cik_irq_fini(rdev);
8645 	sumo_rlc_fini(rdev);
8646 	cik_mec_fini(rdev);
8647 	radeon_wb_fini(rdev);
8648 	radeon_vm_manager_fini(rdev);
8649 	radeon_ib_pool_fini(rdev);
8650 	radeon_irq_kms_fini(rdev);
8651 	uvd_v1_0_fini(rdev);
8652 	radeon_uvd_fini(rdev);
8653 	radeon_vce_fini(rdev);
8654 	cik_pcie_gart_fini(rdev);
8655 	r600_vram_scratch_fini(rdev);
8656 	radeon_gem_fini(rdev);
8657 	radeon_fence_driver_fini(rdev);
8658 	radeon_bo_fini(rdev);
8659 	radeon_atombios_fini(rdev);
8660 	kfree(rdev->bios);
8661 	rdev->bios = NULL;
8662 }
8663 
8664 void dce8_program_fmt(struct drm_encoder *encoder)
8665 {
8666 	struct drm_device *dev = encoder->dev;
8667 	struct radeon_device *rdev = dev->dev_private;
8668 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8669 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8670 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8671 	int bpc = 0;
8672 	u32 tmp = 0;
8673 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8674 
8675 	if (connector) {
8676 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8677 		bpc = radeon_get_monitor_bpc(connector);
8678 		dither = radeon_connector->dither;
8679 	}
8680 
8681 	/* LVDS/eDP FMT is set up by atom */
8682 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8683 		return;
8684 
8685 	/* not needed for analog */
8686 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8687 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8688 		return;
8689 
8690 	if (bpc == 0)
8691 		return;
8692 
8693 	switch (bpc) {
8694 	case 6:
8695 		if (dither == RADEON_FMT_DITHER_ENABLE)
8696 			/* XXX sort out optimal dither settings */
8697 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8698 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8699 		else
8700 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8701 		break;
8702 	case 8:
8703 		if (dither == RADEON_FMT_DITHER_ENABLE)
8704 			/* XXX sort out optimal dither settings */
8705 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8706 				FMT_RGB_RANDOM_ENABLE |
8707 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8708 		else
8709 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8710 		break;
8711 	case 10:
8712 		if (dither == RADEON_FMT_DITHER_ENABLE)
8713 			/* XXX sort out optimal dither settings */
8714 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8715 				FMT_RGB_RANDOM_ENABLE |
8716 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8717 		else
8718 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8719 		break;
8720 	default:
8721 		/* not needed */
8722 		break;
8723 	}
8724 
8725 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8726 }
8727 
8728 /* display watermark setup */
8729 /**
8730  * dce8_line_buffer_adjust - Set up the line buffer
8731  *
8732  * @rdev: radeon_device pointer
8733  * @radeon_crtc: the selected display controller
8734  * @mode: the current display mode on the selected display
8735  * controller
8736  *
8737  * Setup up the line buffer allocation for
8738  * the selected display controller (CIK).
8739  * Returns the line buffer size in pixels.
8740  */
8741 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8742 				   struct radeon_crtc *radeon_crtc,
8743 				   struct drm_display_mode *mode)
8744 {
8745 	u32 tmp, buffer_alloc, i;
8746 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8747 	/*
8748 	 * Line Buffer Setup
8749 	 * There are 6 line buffers, one for each display controllers.
8750 	 * There are 3 partitions per LB. Select the number of partitions
8751 	 * to enable based on the display width.  For display widths larger
8752 	 * than 4096, you need use to use 2 display controllers and combine
8753 	 * them using the stereo blender.
8754 	 */
8755 	if (radeon_crtc->base.enabled && mode) {
8756 		if (mode->crtc_hdisplay < 1920) {
8757 			tmp = 1;
8758 			buffer_alloc = 2;
8759 		} else if (mode->crtc_hdisplay < 2560) {
8760 			tmp = 2;
8761 			buffer_alloc = 2;
8762 		} else if (mode->crtc_hdisplay < 4096) {
8763 			tmp = 0;
8764 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8765 		} else {
8766 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8767 			tmp = 0;
8768 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8769 		}
8770 	} else {
8771 		tmp = 1;
8772 		buffer_alloc = 0;
8773 	}
8774 
8775 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8776 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8777 
8778 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8779 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8780 	for (i = 0; i < rdev->usec_timeout; i++) {
8781 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8782 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8783 			break;
8784 		udelay(1);
8785 	}
8786 
8787 	if (radeon_crtc->base.enabled && mode) {
8788 		switch (tmp) {
8789 		case 0:
8790 		default:
8791 			return 4096 * 2;
8792 		case 1:
8793 			return 1920 * 2;
8794 		case 2:
8795 			return 2560 * 2;
8796 		}
8797 	}
8798 
8799 	/* controller not enabled, so no lb used */
8800 	return 0;
8801 }
8802 
8803 /**
8804  * cik_get_number_of_dram_channels - get the number of dram channels
8805  *
8806  * @rdev: radeon_device pointer
8807  *
8808  * Look up the number of video ram channels (CIK).
8809  * Used for display watermark bandwidth calculations
8810  * Returns the number of dram channels
8811  */
8812 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8813 {
8814 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8815 
8816 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8817 	case 0:
8818 	default:
8819 		return 1;
8820 	case 1:
8821 		return 2;
8822 	case 2:
8823 		return 4;
8824 	case 3:
8825 		return 8;
8826 	case 4:
8827 		return 3;
8828 	case 5:
8829 		return 6;
8830 	case 6:
8831 		return 10;
8832 	case 7:
8833 		return 12;
8834 	case 8:
8835 		return 16;
8836 	}
8837 }
8838 
8839 struct dce8_wm_params {
8840 	u32 dram_channels; /* number of dram channels */
8841 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8842 	u32 sclk;          /* engine clock in kHz */
8843 	u32 disp_clk;      /* display clock in kHz */
8844 	u32 src_width;     /* viewport width */
8845 	u32 active_time;   /* active display time in ns */
8846 	u32 blank_time;    /* blank time in ns */
8847 	bool interlaced;    /* mode is interlaced */
8848 	fixed20_12 vsc;    /* vertical scale ratio */
8849 	u32 num_heads;     /* number of active crtcs */
8850 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8851 	u32 lb_size;       /* line buffer allocated to pipe */
8852 	u32 vtaps;         /* vertical scaler taps */
8853 };
8854 
8855 /**
8856  * dce8_dram_bandwidth - get the dram bandwidth
8857  *
8858  * @wm: watermark calculation data
8859  *
8860  * Calculate the raw dram bandwidth (CIK).
8861  * Used for display watermark bandwidth calculations
8862  * Returns the dram bandwidth in MBytes/s
8863  */
8864 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8865 {
8866 	/* Calculate raw DRAM Bandwidth */
8867 	fixed20_12 dram_efficiency; /* 0.7 */
8868 	fixed20_12 yclk, dram_channels, bandwidth;
8869 	fixed20_12 a;
8870 
8871 	a.full = dfixed_const(1000);
8872 	yclk.full = dfixed_const(wm->yclk);
8873 	yclk.full = dfixed_div(yclk, a);
8874 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8875 	a.full = dfixed_const(10);
8876 	dram_efficiency.full = dfixed_const(7);
8877 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8878 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8879 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8880 
8881 	return dfixed_trunc(bandwidth);
8882 }
8883 
8884 /**
8885  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8886  *
8887  * @wm: watermark calculation data
8888  *
8889  * Calculate the dram bandwidth used for display (CIK).
8890  * Used for display watermark bandwidth calculations
8891  * Returns the dram bandwidth for display in MBytes/s
8892  */
8893 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8894 {
8895 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8896 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8897 	fixed20_12 yclk, dram_channels, bandwidth;
8898 	fixed20_12 a;
8899 
8900 	a.full = dfixed_const(1000);
8901 	yclk.full = dfixed_const(wm->yclk);
8902 	yclk.full = dfixed_div(yclk, a);
8903 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8904 	a.full = dfixed_const(10);
8905 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8906 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8907 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8908 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8909 
8910 	return dfixed_trunc(bandwidth);
8911 }
8912 
8913 /**
8914  * dce8_data_return_bandwidth - get the data return bandwidth
8915  *
8916  * @wm: watermark calculation data
8917  *
8918  * Calculate the data return bandwidth used for display (CIK).
8919  * Used for display watermark bandwidth calculations
8920  * Returns the data return bandwidth in MBytes/s
8921  */
8922 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8923 {
8924 	/* Calculate the display Data return Bandwidth */
8925 	fixed20_12 return_efficiency; /* 0.8 */
8926 	fixed20_12 sclk, bandwidth;
8927 	fixed20_12 a;
8928 
8929 	a.full = dfixed_const(1000);
8930 	sclk.full = dfixed_const(wm->sclk);
8931 	sclk.full = dfixed_div(sclk, a);
8932 	a.full = dfixed_const(10);
8933 	return_efficiency.full = dfixed_const(8);
8934 	return_efficiency.full = dfixed_div(return_efficiency, a);
8935 	a.full = dfixed_const(32);
8936 	bandwidth.full = dfixed_mul(a, sclk);
8937 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8938 
8939 	return dfixed_trunc(bandwidth);
8940 }
8941 
8942 /**
8943  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8944  *
8945  * @wm: watermark calculation data
8946  *
8947  * Calculate the dmif bandwidth used for display (CIK).
8948  * Used for display watermark bandwidth calculations
8949  * Returns the dmif bandwidth in MBytes/s
8950  */
8951 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8952 {
8953 	/* Calculate the DMIF Request Bandwidth */
8954 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8955 	fixed20_12 disp_clk, bandwidth;
8956 	fixed20_12 a, b;
8957 
8958 	a.full = dfixed_const(1000);
8959 	disp_clk.full = dfixed_const(wm->disp_clk);
8960 	disp_clk.full = dfixed_div(disp_clk, a);
8961 	a.full = dfixed_const(32);
8962 	b.full = dfixed_mul(a, disp_clk);
8963 
8964 	a.full = dfixed_const(10);
8965 	disp_clk_request_efficiency.full = dfixed_const(8);
8966 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8967 
8968 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8969 
8970 	return dfixed_trunc(bandwidth);
8971 }
8972 
8973 /**
8974  * dce8_available_bandwidth - get the min available bandwidth
8975  *
8976  * @wm: watermark calculation data
8977  *
8978  * Calculate the min available bandwidth used for display (CIK).
8979  * Used for display watermark bandwidth calculations
8980  * Returns the min available bandwidth in MBytes/s
8981  */
8982 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8983 {
8984 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8985 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8986 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8987 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8988 
8989 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8990 }
8991 
8992 /**
8993  * dce8_average_bandwidth - get the average available bandwidth
8994  *
8995  * @wm: watermark calculation data
8996  *
8997  * Calculate the average available bandwidth used for display (CIK).
8998  * Used for display watermark bandwidth calculations
8999  * Returns the average available bandwidth in MBytes/s
9000  */
9001 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9002 {
9003 	/* Calculate the display mode Average Bandwidth
9004 	 * DisplayMode should contain the source and destination dimensions,
9005 	 * timing, etc.
9006 	 */
9007 	fixed20_12 bpp;
9008 	fixed20_12 line_time;
9009 	fixed20_12 src_width;
9010 	fixed20_12 bandwidth;
9011 	fixed20_12 a;
9012 
9013 	a.full = dfixed_const(1000);
9014 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9015 	line_time.full = dfixed_div(line_time, a);
9016 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9017 	src_width.full = dfixed_const(wm->src_width);
9018 	bandwidth.full = dfixed_mul(src_width, bpp);
9019 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9020 	bandwidth.full = dfixed_div(bandwidth, line_time);
9021 
9022 	return dfixed_trunc(bandwidth);
9023 }
9024 
9025 /**
9026  * dce8_latency_watermark - get the latency watermark
9027  *
9028  * @wm: watermark calculation data
9029  *
9030  * Calculate the latency watermark (CIK).
9031  * Used for display watermark bandwidth calculations
9032  * Returns the latency watermark in ns
9033  */
9034 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9035 {
9036 	/* First calculate the latency in ns */
9037 	u32 mc_latency = 2000; /* 2000 ns. */
9038 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9039 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9040 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9041 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9042 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9043 		(wm->num_heads * cursor_line_pair_return_time);
9044 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9045 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9046 	u32 tmp, dmif_size = 12288;
9047 	fixed20_12 a, b, c;
9048 
9049 	if (wm->num_heads == 0)
9050 		return 0;
9051 
9052 	a.full = dfixed_const(2);
9053 	b.full = dfixed_const(1);
9054 	if ((wm->vsc.full > a.full) ||
9055 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9056 	    (wm->vtaps >= 5) ||
9057 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9058 		max_src_lines_per_dst_line = 4;
9059 	else
9060 		max_src_lines_per_dst_line = 2;
9061 
9062 	a.full = dfixed_const(available_bandwidth);
9063 	b.full = dfixed_const(wm->num_heads);
9064 	a.full = dfixed_div(a, b);
9065 
9066 	b.full = dfixed_const(mc_latency + 512);
9067 	c.full = dfixed_const(wm->disp_clk);
9068 	b.full = dfixed_div(b, c);
9069 
9070 	c.full = dfixed_const(dmif_size);
9071 	b.full = dfixed_div(c, b);
9072 
9073 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9074 
9075 	b.full = dfixed_const(1000);
9076 	c.full = dfixed_const(wm->disp_clk);
9077 	b.full = dfixed_div(c, b);
9078 	c.full = dfixed_const(wm->bytes_per_pixel);
9079 	b.full = dfixed_mul(b, c);
9080 
9081 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9082 
9083 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9084 	b.full = dfixed_const(1000);
9085 	c.full = dfixed_const(lb_fill_bw);
9086 	b.full = dfixed_div(c, b);
9087 	a.full = dfixed_div(a, b);
9088 	line_fill_time = dfixed_trunc(a);
9089 
9090 	if (line_fill_time < wm->active_time)
9091 		return latency;
9092 	else
9093 		return latency + (line_fill_time - wm->active_time);
9094 
9095 }
9096 
9097 /**
9098  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9099  * average and available dram bandwidth
9100  *
9101  * @wm: watermark calculation data
9102  *
9103  * Check if the display average bandwidth fits in the display
9104  * dram bandwidth (CIK).
9105  * Used for display watermark bandwidth calculations
9106  * Returns true if the display fits, false if not.
9107  */
9108 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9109 {
9110 	if (dce8_average_bandwidth(wm) <=
9111 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9112 		return true;
9113 	else
9114 		return false;
9115 }
9116 
9117 /**
9118  * dce8_average_bandwidth_vs_available_bandwidth - check
9119  * average and available bandwidth
9120  *
9121  * @wm: watermark calculation data
9122  *
9123  * Check if the display average bandwidth fits in the display
9124  * available bandwidth (CIK).
9125  * Used for display watermark bandwidth calculations
9126  * Returns true if the display fits, false if not.
9127  */
9128 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9129 {
9130 	if (dce8_average_bandwidth(wm) <=
9131 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9132 		return true;
9133 	else
9134 		return false;
9135 }
9136 
9137 /**
9138  * dce8_check_latency_hiding - check latency hiding
9139  *
9140  * @wm: watermark calculation data
9141  *
9142  * Check latency hiding (CIK).
9143  * Used for display watermark bandwidth calculations
9144  * Returns true if the display fits, false if not.
9145  */
9146 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9147 {
9148 	u32 lb_partitions = wm->lb_size / wm->src_width;
9149 	u32 line_time = wm->active_time + wm->blank_time;
9150 	u32 latency_tolerant_lines;
9151 	u32 latency_hiding;
9152 	fixed20_12 a;
9153 
9154 	a.full = dfixed_const(1);
9155 	if (wm->vsc.full > a.full)
9156 		latency_tolerant_lines = 1;
9157 	else {
9158 		if (lb_partitions <= (wm->vtaps + 1))
9159 			latency_tolerant_lines = 1;
9160 		else
9161 			latency_tolerant_lines = 2;
9162 	}
9163 
9164 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9165 
9166 	if (dce8_latency_watermark(wm) <= latency_hiding)
9167 		return true;
9168 	else
9169 		return false;
9170 }
9171 
9172 /**
9173  * dce8_program_watermarks - program display watermarks
9174  *
9175  * @rdev: radeon_device pointer
9176  * @radeon_crtc: the selected display controller
9177  * @lb_size: line buffer size
9178  * @num_heads: number of display controllers in use
9179  *
9180  * Calculate and program the display watermarks for the
9181  * selected display controller (CIK).
9182  */
9183 static void dce8_program_watermarks(struct radeon_device *rdev,
9184 				    struct radeon_crtc *radeon_crtc,
9185 				    u32 lb_size, u32 num_heads)
9186 {
9187 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9188 	struct dce8_wm_params wm_low, wm_high;
9189 	u32 pixel_period;
9190 	u32 line_time = 0;
9191 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9192 	u32 tmp, wm_mask;
9193 
9194 	if (radeon_crtc->base.enabled && num_heads && mode) {
9195 		pixel_period = 1000000 / (u32)mode->clock;
9196 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9197 
9198 		/* watermark for high clocks */
9199 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9200 		    rdev->pm.dpm_enabled) {
9201 			wm_high.yclk =
9202 				radeon_dpm_get_mclk(rdev, false) * 10;
9203 			wm_high.sclk =
9204 				radeon_dpm_get_sclk(rdev, false) * 10;
9205 		} else {
9206 			wm_high.yclk = rdev->pm.current_mclk * 10;
9207 			wm_high.sclk = rdev->pm.current_sclk * 10;
9208 		}
9209 
9210 		wm_high.disp_clk = mode->clock;
9211 		wm_high.src_width = mode->crtc_hdisplay;
9212 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9213 		wm_high.blank_time = line_time - wm_high.active_time;
9214 		wm_high.interlaced = false;
9215 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9216 			wm_high.interlaced = true;
9217 		wm_high.vsc = radeon_crtc->vsc;
9218 		wm_high.vtaps = 1;
9219 		if (radeon_crtc->rmx_type != RMX_OFF)
9220 			wm_high.vtaps = 2;
9221 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9222 		wm_high.lb_size = lb_size;
9223 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9224 		wm_high.num_heads = num_heads;
9225 
9226 		/* set for high clocks */
9227 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9228 
9229 		/* possibly force display priority to high */
9230 		/* should really do this at mode validation time... */
9231 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9232 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9233 		    !dce8_check_latency_hiding(&wm_high) ||
9234 		    (rdev->disp_priority == 2)) {
9235 			DRM_DEBUG_KMS("force priority to high\n");
9236 		}
9237 
9238 		/* watermark for low clocks */
9239 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9240 		    rdev->pm.dpm_enabled) {
9241 			wm_low.yclk =
9242 				radeon_dpm_get_mclk(rdev, true) * 10;
9243 			wm_low.sclk =
9244 				radeon_dpm_get_sclk(rdev, true) * 10;
9245 		} else {
9246 			wm_low.yclk = rdev->pm.current_mclk * 10;
9247 			wm_low.sclk = rdev->pm.current_sclk * 10;
9248 		}
9249 
9250 		wm_low.disp_clk = mode->clock;
9251 		wm_low.src_width = mode->crtc_hdisplay;
9252 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9253 		wm_low.blank_time = line_time - wm_low.active_time;
9254 		wm_low.interlaced = false;
9255 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9256 			wm_low.interlaced = true;
9257 		wm_low.vsc = radeon_crtc->vsc;
9258 		wm_low.vtaps = 1;
9259 		if (radeon_crtc->rmx_type != RMX_OFF)
9260 			wm_low.vtaps = 2;
9261 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9262 		wm_low.lb_size = lb_size;
9263 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9264 		wm_low.num_heads = num_heads;
9265 
9266 		/* set for low clocks */
9267 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9268 
9269 		/* possibly force display priority to high */
9270 		/* should really do this at mode validation time... */
9271 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9272 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9273 		    !dce8_check_latency_hiding(&wm_low) ||
9274 		    (rdev->disp_priority == 2)) {
9275 			DRM_DEBUG_KMS("force priority to high\n");
9276 		}
9277 
9278 		/* Save number of lines the linebuffer leads before the scanout */
9279 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9280 	}
9281 
9282 	/* select wm A */
9283 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9284 	tmp = wm_mask;
9285 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9286 	tmp |= LATENCY_WATERMARK_MASK(1);
9287 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9288 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9289 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9290 		LATENCY_HIGH_WATERMARK(line_time)));
9291 	/* select wm B */
9292 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9293 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9294 	tmp |= LATENCY_WATERMARK_MASK(2);
9295 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9296 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9297 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9298 		LATENCY_HIGH_WATERMARK(line_time)));
9299 	/* restore original selection */
9300 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9301 
9302 	/* save values for DPM */
9303 	radeon_crtc->line_time = line_time;
9304 	radeon_crtc->wm_high = latency_watermark_a;
9305 	radeon_crtc->wm_low = latency_watermark_b;
9306 }
9307 
9308 /**
9309  * dce8_bandwidth_update - program display watermarks
9310  *
9311  * @rdev: radeon_device pointer
9312  *
9313  * Calculate and program the display watermarks and line
9314  * buffer allocation (CIK).
9315  */
9316 void dce8_bandwidth_update(struct radeon_device *rdev)
9317 {
9318 	struct drm_display_mode *mode = NULL;
9319 	u32 num_heads = 0, lb_size;
9320 	int i;
9321 
9322 	if (!rdev->mode_info.mode_config_initialized)
9323 		return;
9324 
9325 	radeon_update_display_priority(rdev);
9326 
9327 	for (i = 0; i < rdev->num_crtc; i++) {
9328 		if (rdev->mode_info.crtcs[i]->base.enabled)
9329 			num_heads++;
9330 	}
9331 	for (i = 0; i < rdev->num_crtc; i++) {
9332 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9333 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9334 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9335 	}
9336 }
9337 
9338 /**
9339  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9340  *
9341  * @rdev: radeon_device pointer
9342  *
9343  * Fetches a GPU clock counter snapshot (SI).
9344  * Returns the 64 bit clock counter snapshot.
9345  */
9346 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9347 {
9348 	uint64_t clock;
9349 
9350 	mutex_lock(&rdev->gpu_clock_mutex);
9351 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9352 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9353 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9354 	mutex_unlock(&rdev->gpu_clock_mutex);
9355 	return clock;
9356 }
9357 
9358 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9359 			     u32 cntl_reg, u32 status_reg)
9360 {
9361 	int r, i;
9362 	struct atom_clock_dividers dividers;
9363 	uint32_t tmp;
9364 
9365 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9366 					   clock, false, &dividers);
9367 	if (r)
9368 		return r;
9369 
9370 	tmp = RREG32_SMC(cntl_reg);
9371 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9372 	tmp |= dividers.post_divider;
9373 	WREG32_SMC(cntl_reg, tmp);
9374 
9375 	for (i = 0; i < 100; i++) {
9376 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9377 			break;
9378 		mdelay(10);
9379 	}
9380 	if (i == 100)
9381 		return -ETIMEDOUT;
9382 
9383 	return 0;
9384 }
9385 
9386 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9387 {
9388 	int r = 0;
9389 
9390 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9391 	if (r)
9392 		return r;
9393 
9394 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9395 	return r;
9396 }
9397 
9398 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9399 {
9400 	int r, i;
9401 	struct atom_clock_dividers dividers;
9402 	u32 tmp;
9403 
9404 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9405 					   ecclk, false, &dividers);
9406 	if (r)
9407 		return r;
9408 
9409 	for (i = 0; i < 100; i++) {
9410 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9411 			break;
9412 		mdelay(10);
9413 	}
9414 	if (i == 100)
9415 		return -ETIMEDOUT;
9416 
9417 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9418 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9419 	tmp |= dividers.post_divider;
9420 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9421 
9422 	for (i = 0; i < 100; i++) {
9423 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9424 			break;
9425 		mdelay(10);
9426 	}
9427 	if (i == 100)
9428 		return -ETIMEDOUT;
9429 
9430 	return 0;
9431 }
9432 
9433 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9434 {
9435 	struct pci_dev *root = rdev->pdev->bus->self;
9436 	int bridge_pos, gpu_pos;
9437 	u32 speed_cntl, mask, current_data_rate;
9438 	int ret, i;
9439 	u16 tmp16;
9440 
9441 	if (pci_is_root_bus(rdev->pdev->bus))
9442 		return;
9443 
9444 	if (radeon_pcie_gen2 == 0)
9445 		return;
9446 
9447 	if (rdev->flags & RADEON_IS_IGP)
9448 		return;
9449 
9450 	if (!(rdev->flags & RADEON_IS_PCIE))
9451 		return;
9452 
9453 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9454 	if (ret != 0)
9455 		return;
9456 
9457 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9458 		return;
9459 
9460 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9461 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9462 		LC_CURRENT_DATA_RATE_SHIFT;
9463 	if (mask & DRM_PCIE_SPEED_80) {
9464 		if (current_data_rate == 2) {
9465 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9466 			return;
9467 		}
9468 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9469 	} else if (mask & DRM_PCIE_SPEED_50) {
9470 		if (current_data_rate == 1) {
9471 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9472 			return;
9473 		}
9474 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9475 	}
9476 
9477 	bridge_pos = pci_pcie_cap(root);
9478 	if (!bridge_pos)
9479 		return;
9480 
9481 	gpu_pos = pci_pcie_cap(rdev->pdev);
9482 	if (!gpu_pos)
9483 		return;
9484 
9485 	if (mask & DRM_PCIE_SPEED_80) {
9486 		/* re-try equalization if gen3 is not already enabled */
9487 		if (current_data_rate != 2) {
9488 			u16 bridge_cfg, gpu_cfg;
9489 			u16 bridge_cfg2, gpu_cfg2;
9490 			u32 max_lw, current_lw, tmp;
9491 
9492 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9493 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9494 
9495 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9496 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9497 
9498 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9499 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9500 
9501 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9502 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9503 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9504 
9505 			if (current_lw < max_lw) {
9506 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9507 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9508 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9509 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9510 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9511 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9512 				}
9513 			}
9514 
9515 			for (i = 0; i < 10; i++) {
9516 				/* check status */
9517 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9518 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9519 					break;
9520 
9521 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9522 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9523 
9524 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9525 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9526 
9527 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9528 				tmp |= LC_SET_QUIESCE;
9529 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9530 
9531 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9532 				tmp |= LC_REDO_EQ;
9533 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9534 
9535 				mdelay(100);
9536 
9537 				/* linkctl */
9538 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9539 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9540 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9541 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9542 
9543 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9544 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9545 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9546 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9547 
9548 				/* linkctl2 */
9549 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9550 				tmp16 &= ~((1 << 4) | (7 << 9));
9551 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9552 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9553 
9554 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9555 				tmp16 &= ~((1 << 4) | (7 << 9));
9556 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9557 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9558 
9559 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9560 				tmp &= ~LC_SET_QUIESCE;
9561 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9562 			}
9563 		}
9564 	}
9565 
9566 	/* set the link speed */
9567 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9568 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9569 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9570 
9571 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9572 	tmp16 &= ~0xf;
9573 	if (mask & DRM_PCIE_SPEED_80)
9574 		tmp16 |= 3; /* gen3 */
9575 	else if (mask & DRM_PCIE_SPEED_50)
9576 		tmp16 |= 2; /* gen2 */
9577 	else
9578 		tmp16 |= 1; /* gen1 */
9579 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9580 
9581 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9582 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9583 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9584 
9585 	for (i = 0; i < rdev->usec_timeout; i++) {
9586 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9587 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9588 			break;
9589 		udelay(1);
9590 	}
9591 }
9592 
9593 static void cik_program_aspm(struct radeon_device *rdev)
9594 {
9595 	u32 data, orig;
9596 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9597 	bool disable_clkreq = false;
9598 
9599 	if (radeon_aspm == 0)
9600 		return;
9601 
9602 	/* XXX double check IGPs */
9603 	if (rdev->flags & RADEON_IS_IGP)
9604 		return;
9605 
9606 	if (!(rdev->flags & RADEON_IS_PCIE))
9607 		return;
9608 
9609 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9610 	data &= ~LC_XMIT_N_FTS_MASK;
9611 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9612 	if (orig != data)
9613 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9614 
9615 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9616 	data |= LC_GO_TO_RECOVERY;
9617 	if (orig != data)
9618 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9619 
9620 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9621 	data |= P_IGNORE_EDB_ERR;
9622 	if (orig != data)
9623 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9624 
9625 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9626 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9627 	data |= LC_PMI_TO_L1_DIS;
9628 	if (!disable_l0s)
9629 		data |= LC_L0S_INACTIVITY(7);
9630 
9631 	if (!disable_l1) {
9632 		data |= LC_L1_INACTIVITY(7);
9633 		data &= ~LC_PMI_TO_L1_DIS;
9634 		if (orig != data)
9635 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9636 
9637 		if (!disable_plloff_in_l1) {
9638 			bool clk_req_support;
9639 
9640 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9641 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9642 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9643 			if (orig != data)
9644 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9645 
9646 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9647 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9648 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9649 			if (orig != data)
9650 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9651 
9652 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9653 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9654 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9655 			if (orig != data)
9656 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9657 
9658 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9659 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9660 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9661 			if (orig != data)
9662 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9663 
9664 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9665 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9666 			data |= LC_DYN_LANES_PWR_STATE(3);
9667 			if (orig != data)
9668 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9669 
9670 			if (!disable_clkreq &&
9671 			    !pci_is_root_bus(rdev->pdev->bus)) {
9672 				struct pci_dev *root = rdev->pdev->bus->self;
9673 				u32 lnkcap;
9674 
9675 				clk_req_support = false;
9676 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9677 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9678 					clk_req_support = true;
9679 			} else {
9680 				clk_req_support = false;
9681 			}
9682 
9683 			if (clk_req_support) {
9684 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9685 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9686 				if (orig != data)
9687 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9688 
9689 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9690 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9691 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9692 				if (orig != data)
9693 					WREG32_SMC(THM_CLK_CNTL, data);
9694 
9695 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9696 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9697 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9698 				if (orig != data)
9699 					WREG32_SMC(MISC_CLK_CTRL, data);
9700 
9701 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9702 				data &= ~BCLK_AS_XCLK;
9703 				if (orig != data)
9704 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9705 
9706 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9707 				data &= ~FORCE_BIF_REFCLK_EN;
9708 				if (orig != data)
9709 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9710 
9711 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9712 				data &= ~MPLL_CLKOUT_SEL_MASK;
9713 				data |= MPLL_CLKOUT_SEL(4);
9714 				if (orig != data)
9715 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9716 			}
9717 		}
9718 	} else {
9719 		if (orig != data)
9720 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9721 	}
9722 
9723 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9724 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9725 	if (orig != data)
9726 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9727 
9728 	if (!disable_l0s) {
9729 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9730 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9731 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9732 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9733 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9734 				data &= ~LC_L0S_INACTIVITY_MASK;
9735 				if (orig != data)
9736 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9737 			}
9738 		}
9739 	}
9740 }
9741