xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 781095f903f398148cd0b646d3984234a715f29e)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		if ((rdev->pdev->device == 0x1304) ||
3603 		    (rdev->pdev->device == 0x1305) ||
3604 		    (rdev->pdev->device == 0x130C) ||
3605 		    (rdev->pdev->device == 0x130F) ||
3606 		    (rdev->pdev->device == 0x1310) ||
3607 		    (rdev->pdev->device == 0x1311) ||
3608 		    (rdev->pdev->device == 0x131C)) {
3609 			rdev->config.cik.max_cu_per_sh = 8;
3610 			rdev->config.cik.max_backends_per_se = 2;
3611 		} else if ((rdev->pdev->device == 0x1309) ||
3612 			   (rdev->pdev->device == 0x130A) ||
3613 			   (rdev->pdev->device == 0x130D) ||
3614 			   (rdev->pdev->device == 0x1313) ||
3615 			   (rdev->pdev->device == 0x131D)) {
3616 			rdev->config.cik.max_cu_per_sh = 6;
3617 			rdev->config.cik.max_backends_per_se = 2;
3618 		} else if ((rdev->pdev->device == 0x1306) ||
3619 			   (rdev->pdev->device == 0x1307) ||
3620 			   (rdev->pdev->device == 0x130B) ||
3621 			   (rdev->pdev->device == 0x130E) ||
3622 			   (rdev->pdev->device == 0x1315) ||
3623 			   (rdev->pdev->device == 0x1318) ||
3624 			   (rdev->pdev->device == 0x131B)) {
3625 			rdev->config.cik.max_cu_per_sh = 4;
3626 			rdev->config.cik.max_backends_per_se = 1;
3627 		} else {
3628 			rdev->config.cik.max_cu_per_sh = 3;
3629 			rdev->config.cik.max_backends_per_se = 1;
3630 		}
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_texture_channel_caches = 4;
3633 		rdev->config.cik.max_gprs = 256;
3634 		rdev->config.cik.max_gs_threads = 16;
3635 		rdev->config.cik.max_hw_contexts = 8;
3636 
3637 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642 		break;
3643 	case CHIP_KABINI:
3644 	case CHIP_MULLINS:
3645 	default:
3646 		rdev->config.cik.max_shader_engines = 1;
3647 		rdev->config.cik.max_tile_pipes = 2;
3648 		rdev->config.cik.max_cu_per_sh = 2;
3649 		rdev->config.cik.max_sh_per_se = 1;
3650 		rdev->config.cik.max_backends_per_se = 1;
3651 		rdev->config.cik.max_texture_channel_caches = 2;
3652 		rdev->config.cik.max_gprs = 256;
3653 		rdev->config.cik.max_gs_threads = 16;
3654 		rdev->config.cik.max_hw_contexts = 8;
3655 
3656 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661 		break;
3662 	}
3663 
3664 	/* Initialize HDP */
3665 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666 		WREG32((0x2c14 + j), 0x00000000);
3667 		WREG32((0x2c18 + j), 0x00000000);
3668 		WREG32((0x2c1c + j), 0x00000000);
3669 		WREG32((0x2c20 + j), 0x00000000);
3670 		WREG32((0x2c24 + j), 0x00000000);
3671 	}
3672 
3673 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674 	WREG32(SRBM_INT_CNTL, 0x1);
3675 	WREG32(SRBM_INT_ACK, 0x1);
3676 
3677 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678 
3679 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681 
3682 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3684 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3687 		rdev->config.cik.mem_row_size_in_kb = 4;
3688 	/* XXX use MC settings? */
3689 	rdev->config.cik.shader_engine_tile_size = 32;
3690 	rdev->config.cik.num_gpus = 1;
3691 	rdev->config.cik.multi_gpu_tile_size = 64;
3692 
3693 	/* fix up row size */
3694 	gb_addr_config &= ~ROW_SIZE_MASK;
3695 	switch (rdev->config.cik.mem_row_size_in_kb) {
3696 	case 1:
3697 	default:
3698 		gb_addr_config |= ROW_SIZE(0);
3699 		break;
3700 	case 2:
3701 		gb_addr_config |= ROW_SIZE(1);
3702 		break;
3703 	case 4:
3704 		gb_addr_config |= ROW_SIZE(2);
3705 		break;
3706 	}
3707 
3708 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3709 	 * not have bank info, so create a custom tiling dword.
3710 	 * bits 3:0   num_pipes
3711 	 * bits 7:4   num_banks
3712 	 * bits 11:8  group_size
3713 	 * bits 15:12 row_size
3714 	 */
3715 	rdev->config.cik.tile_config = 0;
3716 	switch (rdev->config.cik.num_tile_pipes) {
3717 	case 1:
3718 		rdev->config.cik.tile_config |= (0 << 0);
3719 		break;
3720 	case 2:
3721 		rdev->config.cik.tile_config |= (1 << 0);
3722 		break;
3723 	case 4:
3724 		rdev->config.cik.tile_config |= (2 << 0);
3725 		break;
3726 	case 8:
3727 	default:
3728 		/* XXX what about 12? */
3729 		rdev->config.cik.tile_config |= (3 << 0);
3730 		break;
3731 	}
3732 	rdev->config.cik.tile_config |=
3733 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734 	rdev->config.cik.tile_config |=
3735 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736 	rdev->config.cik.tile_config |=
3737 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738 
3739 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747 
3748 	cik_tiling_mode_table_init(rdev);
3749 
3750 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751 		     rdev->config.cik.max_sh_per_se,
3752 		     rdev->config.cik.max_backends_per_se);
3753 
3754 	rdev->config.cik.active_cus = 0;
3755 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757 			rdev->config.cik.active_cus +=
3758 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759 		}
3760 	}
3761 
3762 	/* set HW defaults for 3D engine */
3763 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764 
3765 	mutex_lock(&rdev->grbm_idx_mutex);
3766 	/*
3767 	 * making sure that the following register writes will be broadcasted
3768 	 * to all the shaders
3769 	 */
3770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771 	WREG32(SX_DEBUG_1, 0x20);
3772 
3773 	WREG32(TA_CNTL_AUX, 0x00010000);
3774 
3775 	tmp = RREG32(SPI_CONFIG_CNTL);
3776 	tmp |= 0x03000000;
3777 	WREG32(SPI_CONFIG_CNTL, tmp);
3778 
3779 	WREG32(SQ_CONFIG, 1);
3780 
3781 	WREG32(DB_DEBUG, 0);
3782 
3783 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784 	tmp |= 0x00000400;
3785 	WREG32(DB_DEBUG2, tmp);
3786 
3787 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788 	tmp |= 0x00020200;
3789 	WREG32(DB_DEBUG3, tmp);
3790 
3791 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792 	tmp |= 0x00018208;
3793 	WREG32(CB_HW_CONTROL, tmp);
3794 
3795 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796 
3797 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801 
3802 	WREG32(VGT_NUM_INSTANCES, 1);
3803 
3804 	WREG32(CP_PERFMON_CNTL, 0);
3805 
3806 	WREG32(SQ_CONFIG, 0);
3807 
3808 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809 					  FORCE_EOV_MAX_REZ_CNT(255)));
3810 
3811 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813 
3814 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3815 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816 
3817 	tmp = RREG32(HDP_MISC_CNTL);
3818 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819 	WREG32(HDP_MISC_CNTL, tmp);
3820 
3821 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823 
3824 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826 	mutex_unlock(&rdev->grbm_idx_mutex);
3827 
3828 	udelay(50);
3829 }
3830 
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846 	int i;
3847 
3848 	rdev->scratch.num_reg = 7;
3849 	rdev->scratch.reg_base = SCRATCH_REG0;
3850 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3851 		rdev->scratch.free[i] = true;
3852 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853 	}
3854 }
3855 
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869 	uint32_t scratch;
3870 	uint32_t tmp = 0;
3871 	unsigned i;
3872 	int r;
3873 
3874 	r = radeon_scratch_get(rdev, &scratch);
3875 	if (r) {
3876 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877 		return r;
3878 	}
3879 	WREG32(scratch, 0xCAFEDEAD);
3880 	r = radeon_ring_lock(rdev, ring, 3);
3881 	if (r) {
3882 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		return r;
3885 	}
3886 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888 	radeon_ring_write(ring, 0xDEADBEEF);
3889 	radeon_ring_unlock_commit(rdev, ring, false);
3890 
3891 	for (i = 0; i < rdev->usec_timeout; i++) {
3892 		tmp = RREG32(scratch);
3893 		if (tmp == 0xDEADBEEF)
3894 			break;
3895 		DRM_UDELAY(1);
3896 	}
3897 	if (i < rdev->usec_timeout) {
3898 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899 	} else {
3900 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901 			  ring->idx, scratch, tmp);
3902 		r = -EINVAL;
3903 	}
3904 	radeon_scratch_free(rdev, scratch);
3905 	return r;
3906 }
3907 
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917 				       int ridx)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[ridx];
3920 	u32 ref_and_mask;
3921 
3922 	switch (ring->idx) {
3923 	case CAYMAN_RING_TYPE_CP1_INDEX:
3924 	case CAYMAN_RING_TYPE_CP2_INDEX:
3925 	default:
3926 		switch (ring->me) {
3927 		case 0:
3928 			ref_and_mask = CP2 << ring->pipe;
3929 			break;
3930 		case 1:
3931 			ref_and_mask = CP6 << ring->pipe;
3932 			break;
3933 		default:
3934 			return;
3935 		}
3936 		break;
3937 	case RADEON_RING_TYPE_GFX_INDEX:
3938 		ref_and_mask = CP0;
3939 		break;
3940 	}
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948 	radeon_ring_write(ring, ref_and_mask);
3949 	radeon_ring_write(ring, ref_and_mask);
3950 	radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952 
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963 			     struct radeon_fence *fence)
3964 {
3965 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3966 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967 
3968 	/* Workaround for cache flush problems. First send a dummy EOP
3969 	 * event down the pipe with seq one below.
3970 	 */
3971 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973 				 EOP_TC_ACTION_EN |
3974 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975 				 EVENT_INDEX(5)));
3976 	radeon_ring_write(ring, addr & 0xfffffffc);
3977 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978 				DATA_SEL(1) | INT_SEL(0));
3979 	radeon_ring_write(ring, fence->seq - 1);
3980 	radeon_ring_write(ring, 0);
3981 
3982 	/* Then send the real EOP event down the pipe. */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, addr & 0xfffffffc);
3989 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990 	radeon_ring_write(ring, fence->seq);
3991 	radeon_ring_write(ring, 0);
3992 }
3993 
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004 				 struct radeon_fence *fence)
4005 {
4006 	struct radeon_ring *ring = &rdev->ring[fence->ring];
4007 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008 
4009 	/* RELEASE_MEM - flush caches, send int */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012 				 EOP_TC_ACTION_EN |
4013 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014 				 EVENT_INDEX(5)));
4015 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016 	radeon_ring_write(ring, addr & 0xfffffffc);
4017 	radeon_ring_write(ring, upper_32_bits(addr));
4018 	radeon_ring_write(ring, fence->seq);
4019 	radeon_ring_write(ring, 0);
4020 }
4021 
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034 			     struct radeon_ring *ring,
4035 			     struct radeon_semaphore *semaphore,
4036 			     bool emit_wait)
4037 {
4038 	uint64_t addr = semaphore->gpu_addr;
4039 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042 	radeon_ring_write(ring, lower_32_bits(addr));
4043 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044 
4045 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046 		/* Prevent the PFP from running ahead of the semaphore wait */
4047 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048 		radeon_ring_write(ring, 0x0);
4049 	}
4050 
4051 	return true;
4052 }
4053 
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068 				    uint64_t src_offset, uint64_t dst_offset,
4069 				    unsigned num_gpu_pages,
4070 				    struct reservation_object *resv)
4071 {
4072 	struct radeon_fence *fence;
4073 	struct radeon_sync sync;
4074 	int ring_index = rdev->asic->copy.blit_ring_index;
4075 	struct radeon_ring *ring = &rdev->ring[ring_index];
4076 	u32 size_in_bytes, cur_size_in_bytes, control;
4077 	int i, num_loops;
4078 	int r = 0;
4079 
4080 	radeon_sync_create(&sync);
4081 
4082 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085 	if (r) {
4086 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4087 		radeon_sync_free(rdev, &sync, NULL);
4088 		return ERR_PTR(r);
4089 	}
4090 
4091 	radeon_sync_resv(rdev, &sync, resv, false);
4092 	radeon_sync_rings(rdev, &sync, ring->idx);
4093 
4094 	for (i = 0; i < num_loops; i++) {
4095 		cur_size_in_bytes = size_in_bytes;
4096 		if (cur_size_in_bytes > 0x1fffff)
4097 			cur_size_in_bytes = 0x1fffff;
4098 		size_in_bytes -= cur_size_in_bytes;
4099 		control = 0;
4100 		if (size_in_bytes == 0)
4101 			control |= PACKET3_DMA_DATA_CP_SYNC;
4102 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103 		radeon_ring_write(ring, control);
4104 		radeon_ring_write(ring, lower_32_bits(src_offset));
4105 		radeon_ring_write(ring, upper_32_bits(src_offset));
4106 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4107 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4108 		radeon_ring_write(ring, cur_size_in_bytes);
4109 		src_offset += cur_size_in_bytes;
4110 		dst_offset += cur_size_in_bytes;
4111 	}
4112 
4113 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4114 	if (r) {
4115 		radeon_ring_unlock_undo(rdev, ring);
4116 		radeon_sync_free(rdev, &sync, NULL);
4117 		return ERR_PTR(r);
4118 	}
4119 
4120 	radeon_ring_unlock_commit(rdev, ring, false);
4121 	radeon_sync_free(rdev, &sync, fence);
4122 
4123 	return fence;
4124 }
4125 
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits a DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * scheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4144 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145 	u32 header, control = INDIRECT_BUFFER_VALID;
4146 
4147 	if (ib->is_const_ib) {
4148 		/* set switch buffer packet before const IB */
4149 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150 		radeon_ring_write(ring, 0);
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153 	} else {
4154 		u32 next_rptr;
4155 		if (ring->rptr_save_reg) {
4156 			next_rptr = ring->wptr + 3 + 4;
4157 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4159 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4160 			radeon_ring_write(ring, next_rptr);
4161 		} else if (rdev->wb.enabled) {
4162 			next_rptr = ring->wptr + 5 + 4;
4163 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167 			radeon_ring_write(ring, next_rptr);
4168 		}
4169 
4170 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171 	}
4172 
4173 	control |= ib->length_dw | (vm_id << 24);
4174 
4175 	radeon_ring_write(ring, header);
4176 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4177 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4178 	radeon_ring_write(ring, control);
4179 }
4180 
4181 /**
4182  * cik_ib_test - basic gfx ring IB test
4183  *
4184  * @rdev: radeon_device pointer
4185  * @ring: radeon_ring structure holding ring information
4186  *
4187  * Allocate an IB and execute it on the gfx ring (CIK).
4188  * Provides a basic gfx ring test to verify that IBs are working.
4189  * Returns 0 on success, error on failure.
4190  */
4191 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4192 {
4193 	struct radeon_ib ib;
4194 	uint32_t scratch;
4195 	uint32_t tmp = 0;
4196 	unsigned i;
4197 	int r;
4198 
4199 	r = radeon_scratch_get(rdev, &scratch);
4200 	if (r) {
4201 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4202 		return r;
4203 	}
4204 	WREG32(scratch, 0xCAFEDEAD);
4205 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4206 	if (r) {
4207 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4208 		radeon_scratch_free(rdev, scratch);
4209 		return r;
4210 	}
4211 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4212 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4213 	ib.ptr[2] = 0xDEADBEEF;
4214 	ib.length_dw = 3;
4215 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4216 	if (r) {
4217 		radeon_scratch_free(rdev, scratch);
4218 		radeon_ib_free(rdev, &ib);
4219 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4220 		return r;
4221 	}
4222 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
4223 		RADEON_USEC_IB_TEST_TIMEOUT));
4224 	if (r < 0) {
4225 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4226 		radeon_scratch_free(rdev, scratch);
4227 		radeon_ib_free(rdev, &ib);
4228 		return r;
4229 	} else if (r == 0) {
4230 		DRM_ERROR("radeon: fence wait timed out.\n");
4231 		radeon_scratch_free(rdev, scratch);
4232 		radeon_ib_free(rdev, &ib);
4233 		return -ETIMEDOUT;
4234 	}
4235 	r = 0;
4236 	for (i = 0; i < rdev->usec_timeout; i++) {
4237 		tmp = RREG32(scratch);
4238 		if (tmp == 0xDEADBEEF)
4239 			break;
4240 		DRM_UDELAY(1);
4241 	}
4242 	if (i < rdev->usec_timeout) {
4243 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4244 	} else {
4245 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4246 			  scratch, tmp);
4247 		r = -EINVAL;
4248 	}
4249 	radeon_scratch_free(rdev, scratch);
4250 	radeon_ib_free(rdev, &ib);
4251 	return r;
4252 }
4253 
4254 /*
4255  * CP.
4256  * On CIK, gfx and compute now have independant command processors.
4257  *
4258  * GFX
4259  * Gfx consists of a single ring and can process both gfx jobs and
4260  * compute jobs.  The gfx CP consists of three microengines (ME):
4261  * PFP - Pre-Fetch Parser
4262  * ME - Micro Engine
4263  * CE - Constant Engine
4264  * The PFP and ME make up what is considered the Drawing Engine (DE).
4265  * The CE is an asynchronous engine used for updating buffer desciptors
4266  * used by the DE so that they can be loaded into cache in parallel
4267  * while the DE is processing state update packets.
4268  *
4269  * Compute
4270  * The compute CP consists of two microengines (ME):
4271  * MEC1 - Compute MicroEngine 1
4272  * MEC2 - Compute MicroEngine 2
4273  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4274  * The queues are exposed to userspace and are programmed directly
4275  * by the compute runtime.
4276  */
4277 /**
4278  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4279  *
4280  * @rdev: radeon_device pointer
4281  * @enable: enable or disable the MEs
4282  *
4283  * Halts or unhalts the gfx MEs.
4284  */
4285 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4286 {
4287 	if (enable)
4288 		WREG32(CP_ME_CNTL, 0);
4289 	else {
4290 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4291 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4292 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4293 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4294 	}
4295 	udelay(50);
4296 }
4297 
4298 /**
4299  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4300  *
4301  * @rdev: radeon_device pointer
4302  *
4303  * Loads the gfx PFP, ME, and CE ucode.
4304  * Returns 0 for success, -EINVAL if the ucode is not available.
4305  */
4306 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4307 {
4308 	int i;
4309 
4310 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4311 		return -EINVAL;
4312 
4313 	cik_cp_gfx_enable(rdev, false);
4314 
4315 	if (rdev->new_fw) {
4316 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4317 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4318 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4319 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4320 		const struct gfx_firmware_header_v1_0 *me_hdr =
4321 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4322 		const __le32 *fw_data;
4323 		u32 fw_size;
4324 
4325 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4326 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4327 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4328 
4329 		/* PFP */
4330 		fw_data = (const __le32 *)
4331 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4332 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4333 		WREG32(CP_PFP_UCODE_ADDR, 0);
4334 		for (i = 0; i < fw_size; i++)
4335 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4336 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4337 
4338 		/* CE */
4339 		fw_data = (const __le32 *)
4340 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4341 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4342 		WREG32(CP_CE_UCODE_ADDR, 0);
4343 		for (i = 0; i < fw_size; i++)
4344 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4345 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4346 
4347 		/* ME */
4348 		fw_data = (const __be32 *)
4349 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4350 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4351 		WREG32(CP_ME_RAM_WADDR, 0);
4352 		for (i = 0; i < fw_size; i++)
4353 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4354 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4355 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4356 	} else {
4357 		const __be32 *fw_data;
4358 
4359 		/* PFP */
4360 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4361 		WREG32(CP_PFP_UCODE_ADDR, 0);
4362 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4363 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4364 		WREG32(CP_PFP_UCODE_ADDR, 0);
4365 
4366 		/* CE */
4367 		fw_data = (const __be32 *)rdev->ce_fw->data;
4368 		WREG32(CP_CE_UCODE_ADDR, 0);
4369 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4370 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4371 		WREG32(CP_CE_UCODE_ADDR, 0);
4372 
4373 		/* ME */
4374 		fw_data = (const __be32 *)rdev->me_fw->data;
4375 		WREG32(CP_ME_RAM_WADDR, 0);
4376 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4377 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4378 		WREG32(CP_ME_RAM_WADDR, 0);
4379 	}
4380 
4381 	return 0;
4382 }
4383 
4384 /**
4385  * cik_cp_gfx_start - start the gfx ring
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Enables the ring and loads the clear state context and other
4390  * packets required to init the ring.
4391  * Returns 0 for success, error for failure.
4392  */
4393 static int cik_cp_gfx_start(struct radeon_device *rdev)
4394 {
4395 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4396 	int r, i;
4397 
4398 	/* init the CP */
4399 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4400 	WREG32(CP_ENDIAN_SWAP, 0);
4401 	WREG32(CP_DEVICE_ID, 1);
4402 
4403 	cik_cp_gfx_enable(rdev, true);
4404 
4405 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4406 	if (r) {
4407 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4408 		return r;
4409 	}
4410 
4411 	/* init the CE partitions.  CE only used for gfx on CIK */
4412 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4413 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4414 	radeon_ring_write(ring, 0x8000);
4415 	radeon_ring_write(ring, 0x8000);
4416 
4417 	/* setup clear context state */
4418 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4419 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4420 
4421 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4422 	radeon_ring_write(ring, 0x80000000);
4423 	radeon_ring_write(ring, 0x80000000);
4424 
4425 	for (i = 0; i < cik_default_size; i++)
4426 		radeon_ring_write(ring, cik_default_state[i]);
4427 
4428 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4429 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4430 
4431 	/* set clear context state */
4432 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4433 	radeon_ring_write(ring, 0);
4434 
4435 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4436 	radeon_ring_write(ring, 0x00000316);
4437 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4438 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4439 
4440 	radeon_ring_unlock_commit(rdev, ring, false);
4441 
4442 	return 0;
4443 }
4444 
4445 /**
4446  * cik_cp_gfx_fini - stop the gfx ring
4447  *
4448  * @rdev: radeon_device pointer
4449  *
4450  * Stop the gfx ring and tear down the driver ring
4451  * info.
4452  */
4453 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4454 {
4455 	cik_cp_gfx_enable(rdev, false);
4456 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4457 }
4458 
4459 /**
4460  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4461  *
4462  * @rdev: radeon_device pointer
4463  *
4464  * Program the location and size of the gfx ring buffer
4465  * and test it to make sure it's working.
4466  * Returns 0 for success, error for failure.
4467  */
4468 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4469 {
4470 	struct radeon_ring *ring;
4471 	u32 tmp;
4472 	u32 rb_bufsz;
4473 	u64 rb_addr;
4474 	int r;
4475 
4476 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4477 	if (rdev->family != CHIP_HAWAII)
4478 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4479 
4480 	/* Set the write pointer delay */
4481 	WREG32(CP_RB_WPTR_DELAY, 0);
4482 
4483 	/* set the RB to use vmid 0 */
4484 	WREG32(CP_RB_VMID, 0);
4485 
4486 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4487 
4488 	/* ring 0 - compute and gfx */
4489 	/* Set ring buffer size */
4490 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4491 	rb_bufsz = order_base_2(ring->ring_size / 8);
4492 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4493 #ifdef __BIG_ENDIAN
4494 	tmp |= BUF_SWAP_32BIT;
4495 #endif
4496 	WREG32(CP_RB0_CNTL, tmp);
4497 
4498 	/* Initialize the ring buffer's read and write pointers */
4499 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4500 	ring->wptr = 0;
4501 	WREG32(CP_RB0_WPTR, ring->wptr);
4502 
4503 	/* set the wb address wether it's enabled or not */
4504 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4505 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4506 
4507 	/* scratch register shadowing is no longer supported */
4508 	WREG32(SCRATCH_UMSK, 0);
4509 
4510 	if (!rdev->wb.enabled)
4511 		tmp |= RB_NO_UPDATE;
4512 
4513 	mdelay(1);
4514 	WREG32(CP_RB0_CNTL, tmp);
4515 
4516 	rb_addr = ring->gpu_addr >> 8;
4517 	WREG32(CP_RB0_BASE, rb_addr);
4518 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4519 
4520 	/* start the ring */
4521 	cik_cp_gfx_start(rdev);
4522 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4523 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4524 	if (r) {
4525 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4526 		return r;
4527 	}
4528 
4529 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4530 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4531 
4532 	return 0;
4533 }
4534 
4535 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4536 		     struct radeon_ring *ring)
4537 {
4538 	u32 rptr;
4539 
4540 	if (rdev->wb.enabled)
4541 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4542 	else
4543 		rptr = RREG32(CP_RB0_RPTR);
4544 
4545 	return rptr;
4546 }
4547 
4548 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4549 		     struct radeon_ring *ring)
4550 {
4551 	u32 wptr;
4552 
4553 	wptr = RREG32(CP_RB0_WPTR);
4554 
4555 	return wptr;
4556 }
4557 
4558 void cik_gfx_set_wptr(struct radeon_device *rdev,
4559 		      struct radeon_ring *ring)
4560 {
4561 	WREG32(CP_RB0_WPTR, ring->wptr);
4562 	(void)RREG32(CP_RB0_WPTR);
4563 }
4564 
4565 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4566 			 struct radeon_ring *ring)
4567 {
4568 	u32 rptr;
4569 
4570 	if (rdev->wb.enabled) {
4571 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4572 	} else {
4573 		mutex_lock(&rdev->srbm_mutex);
4574 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4575 		rptr = RREG32(CP_HQD_PQ_RPTR);
4576 		cik_srbm_select(rdev, 0, 0, 0, 0);
4577 		mutex_unlock(&rdev->srbm_mutex);
4578 	}
4579 
4580 	return rptr;
4581 }
4582 
4583 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4584 			 struct radeon_ring *ring)
4585 {
4586 	u32 wptr;
4587 
4588 	if (rdev->wb.enabled) {
4589 		/* XXX check if swapping is necessary on BE */
4590 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4591 	} else {
4592 		mutex_lock(&rdev->srbm_mutex);
4593 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4594 		wptr = RREG32(CP_HQD_PQ_WPTR);
4595 		cik_srbm_select(rdev, 0, 0, 0, 0);
4596 		mutex_unlock(&rdev->srbm_mutex);
4597 	}
4598 
4599 	return wptr;
4600 }
4601 
4602 void cik_compute_set_wptr(struct radeon_device *rdev,
4603 			  struct radeon_ring *ring)
4604 {
4605 	/* XXX check if swapping is necessary on BE */
4606 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4607 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4608 }
4609 
4610 static void cik_compute_stop(struct radeon_device *rdev,
4611 			     struct radeon_ring *ring)
4612 {
4613 	u32 j, tmp;
4614 
4615 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4616 	/* Disable wptr polling. */
4617 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4618 	tmp &= ~WPTR_POLL_EN;
4619 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4620 	/* Disable HQD. */
4621 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4622 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4623 		for (j = 0; j < rdev->usec_timeout; j++) {
4624 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4625 				break;
4626 			udelay(1);
4627 		}
4628 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4629 		WREG32(CP_HQD_PQ_RPTR, 0);
4630 		WREG32(CP_HQD_PQ_WPTR, 0);
4631 	}
4632 	cik_srbm_select(rdev, 0, 0, 0, 0);
4633 }
4634 
4635 /**
4636  * cik_cp_compute_enable - enable/disable the compute CP MEs
4637  *
4638  * @rdev: radeon_device pointer
4639  * @enable: enable or disable the MEs
4640  *
4641  * Halts or unhalts the compute MEs.
4642  */
4643 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4644 {
4645 	if (enable)
4646 		WREG32(CP_MEC_CNTL, 0);
4647 	else {
4648 		/*
4649 		 * To make hibernation reliable we need to clear compute ring
4650 		 * configuration before halting the compute ring.
4651 		 */
4652 		mutex_lock(&rdev->srbm_mutex);
4653 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4654 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4655 		mutex_unlock(&rdev->srbm_mutex);
4656 
4657 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4658 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4659 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4660 	}
4661 	udelay(50);
4662 }
4663 
4664 /**
4665  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4666  *
4667  * @rdev: radeon_device pointer
4668  *
4669  * Loads the compute MEC1&2 ucode.
4670  * Returns 0 for success, -EINVAL if the ucode is not available.
4671  */
4672 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4673 {
4674 	int i;
4675 
4676 	if (!rdev->mec_fw)
4677 		return -EINVAL;
4678 
4679 	cik_cp_compute_enable(rdev, false);
4680 
4681 	if (rdev->new_fw) {
4682 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4683 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4684 		const __le32 *fw_data;
4685 		u32 fw_size;
4686 
4687 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4688 
4689 		/* MEC1 */
4690 		fw_data = (const __le32 *)
4691 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4692 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4693 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4694 		for (i = 0; i < fw_size; i++)
4695 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4696 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4697 
4698 		/* MEC2 */
4699 		if (rdev->family == CHIP_KAVERI) {
4700 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4701 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4702 
4703 			fw_data = (const __le32 *)
4704 				(rdev->mec2_fw->data +
4705 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4706 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4707 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4708 			for (i = 0; i < fw_size; i++)
4709 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4710 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4711 		}
4712 	} else {
4713 		const __be32 *fw_data;
4714 
4715 		/* MEC1 */
4716 		fw_data = (const __be32 *)rdev->mec_fw->data;
4717 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4718 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4719 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4720 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4721 
4722 		if (rdev->family == CHIP_KAVERI) {
4723 			/* MEC2 */
4724 			fw_data = (const __be32 *)rdev->mec_fw->data;
4725 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4726 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4727 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4728 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4729 		}
4730 	}
4731 
4732 	return 0;
4733 }
4734 
4735 /**
4736  * cik_cp_compute_start - start the compute queues
4737  *
4738  * @rdev: radeon_device pointer
4739  *
4740  * Enable the compute queues.
4741  * Returns 0 for success, error for failure.
4742  */
4743 static int cik_cp_compute_start(struct radeon_device *rdev)
4744 {
4745 	cik_cp_compute_enable(rdev, true);
4746 
4747 	return 0;
4748 }
4749 
4750 /**
4751  * cik_cp_compute_fini - stop the compute queues
4752  *
4753  * @rdev: radeon_device pointer
4754  *
4755  * Stop the compute queues and tear down the driver queue
4756  * info.
4757  */
4758 static void cik_cp_compute_fini(struct radeon_device *rdev)
4759 {
4760 	int i, idx, r;
4761 
4762 	cik_cp_compute_enable(rdev, false);
4763 
4764 	for (i = 0; i < 2; i++) {
4765 		if (i == 0)
4766 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4767 		else
4768 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4769 
4770 		if (rdev->ring[idx].mqd_obj) {
4771 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4772 			if (unlikely(r != 0))
4773 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4774 
4775 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4776 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4777 
4778 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4779 			rdev->ring[idx].mqd_obj = NULL;
4780 		}
4781 	}
4782 }
4783 
4784 static void cik_mec_fini(struct radeon_device *rdev)
4785 {
4786 	int r;
4787 
4788 	if (rdev->mec.hpd_eop_obj) {
4789 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4790 		if (unlikely(r != 0))
4791 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4792 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4793 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4794 
4795 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4796 		rdev->mec.hpd_eop_obj = NULL;
4797 	}
4798 }
4799 
4800 #define MEC_HPD_SIZE 2048
4801 
4802 static int cik_mec_init(struct radeon_device *rdev)
4803 {
4804 	int r;
4805 	u32 *hpd;
4806 
4807 	/*
4808 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4809 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4810 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4811 	 * be handled by KFD
4812 	 */
4813 	rdev->mec.num_mec = 1;
4814 	rdev->mec.num_pipe = 1;
4815 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4816 
4817 	if (rdev->mec.hpd_eop_obj == NULL) {
4818 		r = radeon_bo_create(rdev,
4819 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4820 				     PAGE_SIZE, true,
4821 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4822 				     &rdev->mec.hpd_eop_obj);
4823 		if (r) {
4824 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4825 			return r;
4826 		}
4827 	}
4828 
4829 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4830 	if (unlikely(r != 0)) {
4831 		cik_mec_fini(rdev);
4832 		return r;
4833 	}
4834 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4835 			  &rdev->mec.hpd_eop_gpu_addr);
4836 	if (r) {
4837 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4838 		cik_mec_fini(rdev);
4839 		return r;
4840 	}
4841 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4842 	if (r) {
4843 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4844 		cik_mec_fini(rdev);
4845 		return r;
4846 	}
4847 
4848 	/* clear memory.  Not sure if this is required or not */
4849 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4850 
4851 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4852 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4853 
4854 	return 0;
4855 }
4856 
4857 struct hqd_registers
4858 {
4859 	u32 cp_mqd_base_addr;
4860 	u32 cp_mqd_base_addr_hi;
4861 	u32 cp_hqd_active;
4862 	u32 cp_hqd_vmid;
4863 	u32 cp_hqd_persistent_state;
4864 	u32 cp_hqd_pipe_priority;
4865 	u32 cp_hqd_queue_priority;
4866 	u32 cp_hqd_quantum;
4867 	u32 cp_hqd_pq_base;
4868 	u32 cp_hqd_pq_base_hi;
4869 	u32 cp_hqd_pq_rptr;
4870 	u32 cp_hqd_pq_rptr_report_addr;
4871 	u32 cp_hqd_pq_rptr_report_addr_hi;
4872 	u32 cp_hqd_pq_wptr_poll_addr;
4873 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4874 	u32 cp_hqd_pq_doorbell_control;
4875 	u32 cp_hqd_pq_wptr;
4876 	u32 cp_hqd_pq_control;
4877 	u32 cp_hqd_ib_base_addr;
4878 	u32 cp_hqd_ib_base_addr_hi;
4879 	u32 cp_hqd_ib_rptr;
4880 	u32 cp_hqd_ib_control;
4881 	u32 cp_hqd_iq_timer;
4882 	u32 cp_hqd_iq_rptr;
4883 	u32 cp_hqd_dequeue_request;
4884 	u32 cp_hqd_dma_offload;
4885 	u32 cp_hqd_sema_cmd;
4886 	u32 cp_hqd_msg_type;
4887 	u32 cp_hqd_atomic0_preop_lo;
4888 	u32 cp_hqd_atomic0_preop_hi;
4889 	u32 cp_hqd_atomic1_preop_lo;
4890 	u32 cp_hqd_atomic1_preop_hi;
4891 	u32 cp_hqd_hq_scheduler0;
4892 	u32 cp_hqd_hq_scheduler1;
4893 	u32 cp_mqd_control;
4894 };
4895 
4896 struct bonaire_mqd
4897 {
4898 	u32 header;
4899 	u32 dispatch_initiator;
4900 	u32 dimensions[3];
4901 	u32 start_idx[3];
4902 	u32 num_threads[3];
4903 	u32 pipeline_stat_enable;
4904 	u32 perf_counter_enable;
4905 	u32 pgm[2];
4906 	u32 tba[2];
4907 	u32 tma[2];
4908 	u32 pgm_rsrc[2];
4909 	u32 vmid;
4910 	u32 resource_limits;
4911 	u32 static_thread_mgmt01[2];
4912 	u32 tmp_ring_size;
4913 	u32 static_thread_mgmt23[2];
4914 	u32 restart[3];
4915 	u32 thread_trace_enable;
4916 	u32 reserved1;
4917 	u32 user_data[16];
4918 	u32 vgtcs_invoke_count[2];
4919 	struct hqd_registers queue_state;
4920 	u32 dequeue_cntr;
4921 	u32 interrupt_queue[64];
4922 };
4923 
4924 /**
4925  * cik_cp_compute_resume - setup the compute queue registers
4926  *
4927  * @rdev: radeon_device pointer
4928  *
4929  * Program the compute queues and test them to make sure they
4930  * are working.
4931  * Returns 0 for success, error for failure.
4932  */
4933 static int cik_cp_compute_resume(struct radeon_device *rdev)
4934 {
4935 	int r, i, j, idx;
4936 	u32 tmp;
4937 	bool use_doorbell = true;
4938 	u64 hqd_gpu_addr;
4939 	u64 mqd_gpu_addr;
4940 	u64 eop_gpu_addr;
4941 	u64 wb_gpu_addr;
4942 	u32 *buf;
4943 	struct bonaire_mqd *mqd;
4944 
4945 	r = cik_cp_compute_start(rdev);
4946 	if (r)
4947 		return r;
4948 
4949 	/* fix up chicken bits */
4950 	tmp = RREG32(CP_CPF_DEBUG);
4951 	tmp |= (1 << 23);
4952 	WREG32(CP_CPF_DEBUG, tmp);
4953 
4954 	/* init the pipes */
4955 	mutex_lock(&rdev->srbm_mutex);
4956 
4957 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4958 
4959 	cik_srbm_select(rdev, 0, 0, 0, 0);
4960 
4961 	/* write the EOP addr */
4962 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4963 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4964 
4965 	/* set the VMID assigned */
4966 	WREG32(CP_HPD_EOP_VMID, 0);
4967 
4968 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4969 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4970 	tmp &= ~EOP_SIZE_MASK;
4971 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4972 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4973 
4974 	mutex_unlock(&rdev->srbm_mutex);
4975 
4976 	/* init the queues.  Just two for now. */
4977 	for (i = 0; i < 2; i++) {
4978 		if (i == 0)
4979 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4980 		else
4981 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4982 
4983 		if (rdev->ring[idx].mqd_obj == NULL) {
4984 			r = radeon_bo_create(rdev,
4985 					     sizeof(struct bonaire_mqd),
4986 					     PAGE_SIZE, true,
4987 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4988 					     NULL, &rdev->ring[idx].mqd_obj);
4989 			if (r) {
4990 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4991 				return r;
4992 			}
4993 		}
4994 
4995 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4996 		if (unlikely(r != 0)) {
4997 			cik_cp_compute_fini(rdev);
4998 			return r;
4999 		}
5000 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
5001 				  &mqd_gpu_addr);
5002 		if (r) {
5003 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
5004 			cik_cp_compute_fini(rdev);
5005 			return r;
5006 		}
5007 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5008 		if (r) {
5009 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5010 			cik_cp_compute_fini(rdev);
5011 			return r;
5012 		}
5013 
5014 		/* init the mqd struct */
5015 		memset(buf, 0, sizeof(struct bonaire_mqd));
5016 
5017 		mqd = (struct bonaire_mqd *)buf;
5018 		mqd->header = 0xC0310800;
5019 		mqd->static_thread_mgmt01[0] = 0xffffffff;
5020 		mqd->static_thread_mgmt01[1] = 0xffffffff;
5021 		mqd->static_thread_mgmt23[0] = 0xffffffff;
5022 		mqd->static_thread_mgmt23[1] = 0xffffffff;
5023 
5024 		mutex_lock(&rdev->srbm_mutex);
5025 		cik_srbm_select(rdev, rdev->ring[idx].me,
5026 				rdev->ring[idx].pipe,
5027 				rdev->ring[idx].queue, 0);
5028 
5029 		/* disable wptr polling */
5030 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5031 		tmp &= ~WPTR_POLL_EN;
5032 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5033 
5034 		/* enable doorbell? */
5035 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5036 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5037 		if (use_doorbell)
5038 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5039 		else
5040 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5041 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5042 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5043 
5044 		/* disable the queue if it's active */
5045 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5046 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5047 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5048 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5049 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5050 			for (j = 0; j < rdev->usec_timeout; j++) {
5051 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5052 					break;
5053 				udelay(1);
5054 			}
5055 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5056 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5057 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5058 		}
5059 
5060 		/* set the pointer to the MQD */
5061 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5062 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5063 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5064 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5065 		/* set MQD vmid to 0 */
5066 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5067 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5068 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5069 
5070 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5071 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5072 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5073 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5074 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5075 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5076 
5077 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5078 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5079 		mqd->queue_state.cp_hqd_pq_control &=
5080 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5081 
5082 		mqd->queue_state.cp_hqd_pq_control |=
5083 			order_base_2(rdev->ring[idx].ring_size / 8);
5084 		mqd->queue_state.cp_hqd_pq_control |=
5085 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5086 #ifdef __BIG_ENDIAN
5087 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5088 #endif
5089 		mqd->queue_state.cp_hqd_pq_control &=
5090 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5091 		mqd->queue_state.cp_hqd_pq_control |=
5092 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5093 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5094 
5095 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5096 		if (i == 0)
5097 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5098 		else
5099 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5100 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5101 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5102 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5103 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5104 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5105 
5106 		/* set the wb address wether it's enabled or not */
5107 		if (i == 0)
5108 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5109 		else
5110 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5111 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5112 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5113 			upper_32_bits(wb_gpu_addr) & 0xffff;
5114 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5115 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5116 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5117 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5118 
5119 		/* enable the doorbell if requested */
5120 		if (use_doorbell) {
5121 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5122 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5123 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5124 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5125 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5126 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5127 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5128 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5129 
5130 		} else {
5131 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5132 		}
5133 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5134 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5135 
5136 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5137 		rdev->ring[idx].wptr = 0;
5138 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5139 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5140 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5141 
5142 		/* set the vmid for the queue */
5143 		mqd->queue_state.cp_hqd_vmid = 0;
5144 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5145 
5146 		/* activate the queue */
5147 		mqd->queue_state.cp_hqd_active = 1;
5148 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5149 
5150 		cik_srbm_select(rdev, 0, 0, 0, 0);
5151 		mutex_unlock(&rdev->srbm_mutex);
5152 
5153 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5154 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5155 
5156 		rdev->ring[idx].ready = true;
5157 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5158 		if (r)
5159 			rdev->ring[idx].ready = false;
5160 	}
5161 
5162 	return 0;
5163 }
5164 
5165 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5166 {
5167 	cik_cp_gfx_enable(rdev, enable);
5168 	cik_cp_compute_enable(rdev, enable);
5169 }
5170 
5171 static int cik_cp_load_microcode(struct radeon_device *rdev)
5172 {
5173 	int r;
5174 
5175 	r = cik_cp_gfx_load_microcode(rdev);
5176 	if (r)
5177 		return r;
5178 	r = cik_cp_compute_load_microcode(rdev);
5179 	if (r)
5180 		return r;
5181 
5182 	return 0;
5183 }
5184 
5185 static void cik_cp_fini(struct radeon_device *rdev)
5186 {
5187 	cik_cp_gfx_fini(rdev);
5188 	cik_cp_compute_fini(rdev);
5189 }
5190 
5191 static int cik_cp_resume(struct radeon_device *rdev)
5192 {
5193 	int r;
5194 
5195 	cik_enable_gui_idle_interrupt(rdev, false);
5196 
5197 	r = cik_cp_load_microcode(rdev);
5198 	if (r)
5199 		return r;
5200 
5201 	r = cik_cp_gfx_resume(rdev);
5202 	if (r)
5203 		return r;
5204 	r = cik_cp_compute_resume(rdev);
5205 	if (r)
5206 		return r;
5207 
5208 	cik_enable_gui_idle_interrupt(rdev, true);
5209 
5210 	return 0;
5211 }
5212 
5213 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5214 {
5215 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5216 		RREG32(GRBM_STATUS));
5217 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5218 		RREG32(GRBM_STATUS2));
5219 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5220 		RREG32(GRBM_STATUS_SE0));
5221 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5222 		RREG32(GRBM_STATUS_SE1));
5223 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5224 		RREG32(GRBM_STATUS_SE2));
5225 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5226 		RREG32(GRBM_STATUS_SE3));
5227 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5228 		RREG32(SRBM_STATUS));
5229 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5230 		RREG32(SRBM_STATUS2));
5231 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5232 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5233 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5234 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5235 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5236 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5237 		 RREG32(CP_STALLED_STAT1));
5238 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5239 		 RREG32(CP_STALLED_STAT2));
5240 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5241 		 RREG32(CP_STALLED_STAT3));
5242 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5243 		 RREG32(CP_CPF_BUSY_STAT));
5244 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5245 		 RREG32(CP_CPF_STALLED_STAT1));
5246 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5247 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5248 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5249 		 RREG32(CP_CPC_STALLED_STAT1));
5250 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5251 }
5252 
5253 /**
5254  * cik_gpu_check_soft_reset - check which blocks are busy
5255  *
5256  * @rdev: radeon_device pointer
5257  *
5258  * Check which blocks are busy and return the relevant reset
5259  * mask to be used by cik_gpu_soft_reset().
5260  * Returns a mask of the blocks to be reset.
5261  */
5262 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5263 {
5264 	u32 reset_mask = 0;
5265 	u32 tmp;
5266 
5267 	/* GRBM_STATUS */
5268 	tmp = RREG32(GRBM_STATUS);
5269 	if (tmp & (PA_BUSY | SC_BUSY |
5270 		   BCI_BUSY | SX_BUSY |
5271 		   TA_BUSY | VGT_BUSY |
5272 		   DB_BUSY | CB_BUSY |
5273 		   GDS_BUSY | SPI_BUSY |
5274 		   IA_BUSY | IA_BUSY_NO_DMA))
5275 		reset_mask |= RADEON_RESET_GFX;
5276 
5277 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5278 		reset_mask |= RADEON_RESET_CP;
5279 
5280 	/* GRBM_STATUS2 */
5281 	tmp = RREG32(GRBM_STATUS2);
5282 	if (tmp & RLC_BUSY)
5283 		reset_mask |= RADEON_RESET_RLC;
5284 
5285 	/* SDMA0_STATUS_REG */
5286 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5287 	if (!(tmp & SDMA_IDLE))
5288 		reset_mask |= RADEON_RESET_DMA;
5289 
5290 	/* SDMA1_STATUS_REG */
5291 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5292 	if (!(tmp & SDMA_IDLE))
5293 		reset_mask |= RADEON_RESET_DMA1;
5294 
5295 	/* SRBM_STATUS2 */
5296 	tmp = RREG32(SRBM_STATUS2);
5297 	if (tmp & SDMA_BUSY)
5298 		reset_mask |= RADEON_RESET_DMA;
5299 
5300 	if (tmp & SDMA1_BUSY)
5301 		reset_mask |= RADEON_RESET_DMA1;
5302 
5303 	/* SRBM_STATUS */
5304 	tmp = RREG32(SRBM_STATUS);
5305 
5306 	if (tmp & IH_BUSY)
5307 		reset_mask |= RADEON_RESET_IH;
5308 
5309 	if (tmp & SEM_BUSY)
5310 		reset_mask |= RADEON_RESET_SEM;
5311 
5312 	if (tmp & GRBM_RQ_PENDING)
5313 		reset_mask |= RADEON_RESET_GRBM;
5314 
5315 	if (tmp & VMC_BUSY)
5316 		reset_mask |= RADEON_RESET_VMC;
5317 
5318 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5319 		   MCC_BUSY | MCD_BUSY))
5320 		reset_mask |= RADEON_RESET_MC;
5321 
5322 	if (evergreen_is_display_hung(rdev))
5323 		reset_mask |= RADEON_RESET_DISPLAY;
5324 
5325 	/* Skip MC reset as it's mostly likely not hung, just busy */
5326 	if (reset_mask & RADEON_RESET_MC) {
5327 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5328 		reset_mask &= ~RADEON_RESET_MC;
5329 	}
5330 
5331 	return reset_mask;
5332 }
5333 
5334 /**
5335  * cik_gpu_soft_reset - soft reset GPU
5336  *
5337  * @rdev: radeon_device pointer
5338  * @reset_mask: mask of which blocks to reset
5339  *
5340  * Soft reset the blocks specified in @reset_mask.
5341  */
5342 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5343 {
5344 	struct evergreen_mc_save save;
5345 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5346 	u32 tmp;
5347 
5348 	if (reset_mask == 0)
5349 		return;
5350 
5351 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5352 
5353 	cik_print_gpu_status_regs(rdev);
5354 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5355 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5356 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5357 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5358 
5359 	/* disable CG/PG */
5360 	cik_fini_pg(rdev);
5361 	cik_fini_cg(rdev);
5362 
5363 	/* stop the rlc */
5364 	cik_rlc_stop(rdev);
5365 
5366 	/* Disable GFX parsing/prefetching */
5367 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5368 
5369 	/* Disable MEC parsing/prefetching */
5370 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5371 
5372 	if (reset_mask & RADEON_RESET_DMA) {
5373 		/* sdma0 */
5374 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5375 		tmp |= SDMA_HALT;
5376 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5377 	}
5378 	if (reset_mask & RADEON_RESET_DMA1) {
5379 		/* sdma1 */
5380 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5381 		tmp |= SDMA_HALT;
5382 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5383 	}
5384 
5385 	evergreen_mc_stop(rdev, &save);
5386 	if (evergreen_mc_wait_for_idle(rdev)) {
5387 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5388 	}
5389 
5390 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5391 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5392 
5393 	if (reset_mask & RADEON_RESET_CP) {
5394 		grbm_soft_reset |= SOFT_RESET_CP;
5395 
5396 		srbm_soft_reset |= SOFT_RESET_GRBM;
5397 	}
5398 
5399 	if (reset_mask & RADEON_RESET_DMA)
5400 		srbm_soft_reset |= SOFT_RESET_SDMA;
5401 
5402 	if (reset_mask & RADEON_RESET_DMA1)
5403 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5404 
5405 	if (reset_mask & RADEON_RESET_DISPLAY)
5406 		srbm_soft_reset |= SOFT_RESET_DC;
5407 
5408 	if (reset_mask & RADEON_RESET_RLC)
5409 		grbm_soft_reset |= SOFT_RESET_RLC;
5410 
5411 	if (reset_mask & RADEON_RESET_SEM)
5412 		srbm_soft_reset |= SOFT_RESET_SEM;
5413 
5414 	if (reset_mask & RADEON_RESET_IH)
5415 		srbm_soft_reset |= SOFT_RESET_IH;
5416 
5417 	if (reset_mask & RADEON_RESET_GRBM)
5418 		srbm_soft_reset |= SOFT_RESET_GRBM;
5419 
5420 	if (reset_mask & RADEON_RESET_VMC)
5421 		srbm_soft_reset |= SOFT_RESET_VMC;
5422 
5423 	if (!(rdev->flags & RADEON_IS_IGP)) {
5424 		if (reset_mask & RADEON_RESET_MC)
5425 			srbm_soft_reset |= SOFT_RESET_MC;
5426 	}
5427 
5428 	if (grbm_soft_reset) {
5429 		tmp = RREG32(GRBM_SOFT_RESET);
5430 		tmp |= grbm_soft_reset;
5431 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5432 		WREG32(GRBM_SOFT_RESET, tmp);
5433 		tmp = RREG32(GRBM_SOFT_RESET);
5434 
5435 		udelay(50);
5436 
5437 		tmp &= ~grbm_soft_reset;
5438 		WREG32(GRBM_SOFT_RESET, tmp);
5439 		tmp = RREG32(GRBM_SOFT_RESET);
5440 	}
5441 
5442 	if (srbm_soft_reset) {
5443 		tmp = RREG32(SRBM_SOFT_RESET);
5444 		tmp |= srbm_soft_reset;
5445 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5446 		WREG32(SRBM_SOFT_RESET, tmp);
5447 		tmp = RREG32(SRBM_SOFT_RESET);
5448 
5449 		udelay(50);
5450 
5451 		tmp &= ~srbm_soft_reset;
5452 		WREG32(SRBM_SOFT_RESET, tmp);
5453 		tmp = RREG32(SRBM_SOFT_RESET);
5454 	}
5455 
5456 	/* Wait a little for things to settle down */
5457 	udelay(50);
5458 
5459 	evergreen_mc_resume(rdev, &save);
5460 	udelay(50);
5461 
5462 	cik_print_gpu_status_regs(rdev);
5463 }
5464 
5465 struct kv_reset_save_regs {
5466 	u32 gmcon_reng_execute;
5467 	u32 gmcon_misc;
5468 	u32 gmcon_misc3;
5469 };
5470 
5471 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5472 				   struct kv_reset_save_regs *save)
5473 {
5474 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5475 	save->gmcon_misc = RREG32(GMCON_MISC);
5476 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5477 
5478 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5479 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5480 						STCTRL_STUTTER_EN));
5481 }
5482 
5483 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5484 				      struct kv_reset_save_regs *save)
5485 {
5486 	int i;
5487 
5488 	WREG32(GMCON_PGFSM_WRITE, 0);
5489 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5490 
5491 	for (i = 0; i < 5; i++)
5492 		WREG32(GMCON_PGFSM_WRITE, 0);
5493 
5494 	WREG32(GMCON_PGFSM_WRITE, 0);
5495 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5496 
5497 	for (i = 0; i < 5; i++)
5498 		WREG32(GMCON_PGFSM_WRITE, 0);
5499 
5500 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5501 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5502 
5503 	for (i = 0; i < 5; i++)
5504 		WREG32(GMCON_PGFSM_WRITE, 0);
5505 
5506 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5507 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5508 
5509 	for (i = 0; i < 5; i++)
5510 		WREG32(GMCON_PGFSM_WRITE, 0);
5511 
5512 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5513 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5514 
5515 	for (i = 0; i < 5; i++)
5516 		WREG32(GMCON_PGFSM_WRITE, 0);
5517 
5518 	WREG32(GMCON_PGFSM_WRITE, 0);
5519 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5520 
5521 	for (i = 0; i < 5; i++)
5522 		WREG32(GMCON_PGFSM_WRITE, 0);
5523 
5524 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5525 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5526 
5527 	for (i = 0; i < 5; i++)
5528 		WREG32(GMCON_PGFSM_WRITE, 0);
5529 
5530 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5531 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5532 
5533 	for (i = 0; i < 5; i++)
5534 		WREG32(GMCON_PGFSM_WRITE, 0);
5535 
5536 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5537 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5538 
5539 	for (i = 0; i < 5; i++)
5540 		WREG32(GMCON_PGFSM_WRITE, 0);
5541 
5542 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5543 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5544 
5545 	for (i = 0; i < 5; i++)
5546 		WREG32(GMCON_PGFSM_WRITE, 0);
5547 
5548 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5549 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5550 
5551 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5552 	WREG32(GMCON_MISC, save->gmcon_misc);
5553 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5554 }
5555 
5556 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5557 {
5558 	struct evergreen_mc_save save;
5559 	struct kv_reset_save_regs kv_save = { 0 };
5560 	u32 tmp, i;
5561 
5562 	dev_info(rdev->dev, "GPU pci config reset\n");
5563 
5564 	/* disable dpm? */
5565 
5566 	/* disable cg/pg */
5567 	cik_fini_pg(rdev);
5568 	cik_fini_cg(rdev);
5569 
5570 	/* Disable GFX parsing/prefetching */
5571 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5572 
5573 	/* Disable MEC parsing/prefetching */
5574 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5575 
5576 	/* sdma0 */
5577 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5578 	tmp |= SDMA_HALT;
5579 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5580 	/* sdma1 */
5581 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5582 	tmp |= SDMA_HALT;
5583 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5584 	/* XXX other engines? */
5585 
5586 	/* halt the rlc, disable cp internal ints */
5587 	cik_rlc_stop(rdev);
5588 
5589 	udelay(50);
5590 
5591 	/* disable mem access */
5592 	evergreen_mc_stop(rdev, &save);
5593 	if (evergreen_mc_wait_for_idle(rdev)) {
5594 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5595 	}
5596 
5597 	if (rdev->flags & RADEON_IS_IGP)
5598 		kv_save_regs_for_reset(rdev, &kv_save);
5599 
5600 	/* disable BM */
5601 	pci_clear_master(rdev->pdev);
5602 	/* reset */
5603 	radeon_pci_config_reset(rdev);
5604 
5605 	udelay(100);
5606 
5607 	/* wait for asic to come out of reset */
5608 	for (i = 0; i < rdev->usec_timeout; i++) {
5609 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5610 			break;
5611 		udelay(1);
5612 	}
5613 
5614 	/* does asic init need to be run first??? */
5615 	if (rdev->flags & RADEON_IS_IGP)
5616 		kv_restore_regs_for_reset(rdev, &kv_save);
5617 }
5618 
5619 /**
5620  * cik_asic_reset - soft reset GPU
5621  *
5622  * @rdev: radeon_device pointer
5623  *
5624  * Look up which blocks are hung and attempt
5625  * to reset them.
5626  * Returns 0 for success.
5627  */
5628 int cik_asic_reset(struct radeon_device *rdev)
5629 {
5630 	u32 reset_mask;
5631 
5632 	reset_mask = cik_gpu_check_soft_reset(rdev);
5633 
5634 	if (reset_mask)
5635 		r600_set_bios_scratch_engine_hung(rdev, true);
5636 
5637 	/* try soft reset */
5638 	cik_gpu_soft_reset(rdev, reset_mask);
5639 
5640 	reset_mask = cik_gpu_check_soft_reset(rdev);
5641 
5642 	/* try pci config reset */
5643 	if (reset_mask && radeon_hard_reset)
5644 		cik_gpu_pci_config_reset(rdev);
5645 
5646 	reset_mask = cik_gpu_check_soft_reset(rdev);
5647 
5648 	if (!reset_mask)
5649 		r600_set_bios_scratch_engine_hung(rdev, false);
5650 
5651 	return 0;
5652 }
5653 
5654 /**
5655  * cik_gfx_is_lockup - check if the 3D engine is locked up
5656  *
5657  * @rdev: radeon_device pointer
5658  * @ring: radeon_ring structure holding ring information
5659  *
5660  * Check if the 3D engine is locked up (CIK).
5661  * Returns true if the engine is locked, false if not.
5662  */
5663 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5664 {
5665 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5666 
5667 	if (!(reset_mask & (RADEON_RESET_GFX |
5668 			    RADEON_RESET_COMPUTE |
5669 			    RADEON_RESET_CP))) {
5670 		radeon_ring_lockup_update(rdev, ring);
5671 		return false;
5672 	}
5673 	return radeon_ring_test_lockup(rdev, ring);
5674 }
5675 
5676 /* MC */
5677 /**
5678  * cik_mc_program - program the GPU memory controller
5679  *
5680  * @rdev: radeon_device pointer
5681  *
5682  * Set the location of vram, gart, and AGP in the GPU's
5683  * physical address space (CIK).
5684  */
5685 static void cik_mc_program(struct radeon_device *rdev)
5686 {
5687 	struct evergreen_mc_save save;
5688 	u32 tmp;
5689 	int i, j;
5690 
5691 	/* Initialize HDP */
5692 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5693 		WREG32((0x2c14 + j), 0x00000000);
5694 		WREG32((0x2c18 + j), 0x00000000);
5695 		WREG32((0x2c1c + j), 0x00000000);
5696 		WREG32((0x2c20 + j), 0x00000000);
5697 		WREG32((0x2c24 + j), 0x00000000);
5698 	}
5699 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5700 
5701 	evergreen_mc_stop(rdev, &save);
5702 	if (radeon_mc_wait_for_idle(rdev)) {
5703 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5704 	}
5705 	/* Lockout access through VGA aperture*/
5706 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5707 	/* Update configuration */
5708 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5709 	       rdev->mc.vram_start >> 12);
5710 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5711 	       rdev->mc.vram_end >> 12);
5712 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5713 	       rdev->vram_scratch.gpu_addr >> 12);
5714 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5715 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5716 	WREG32(MC_VM_FB_LOCATION, tmp);
5717 	/* XXX double check these! */
5718 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5719 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5720 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5721 	WREG32(MC_VM_AGP_BASE, 0);
5722 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5723 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5724 	if (radeon_mc_wait_for_idle(rdev)) {
5725 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5726 	}
5727 	evergreen_mc_resume(rdev, &save);
5728 	/* we need to own VRAM, so turn off the VGA renderer here
5729 	 * to stop it overwriting our objects */
5730 	rv515_vga_render_disable(rdev);
5731 }
5732 
5733 /**
5734  * cik_mc_init - initialize the memory controller driver params
5735  *
5736  * @rdev: radeon_device pointer
5737  *
5738  * Look up the amount of vram, vram width, and decide how to place
5739  * vram and gart within the GPU's physical address space (CIK).
5740  * Returns 0 for success.
5741  */
5742 static int cik_mc_init(struct radeon_device *rdev)
5743 {
5744 	u32 tmp;
5745 	int chansize, numchan;
5746 
5747 	/* Get VRAM informations */
5748 	rdev->mc.vram_is_ddr = true;
5749 	tmp = RREG32(MC_ARB_RAMCFG);
5750 	if (tmp & CHANSIZE_MASK) {
5751 		chansize = 64;
5752 	} else {
5753 		chansize = 32;
5754 	}
5755 	tmp = RREG32(MC_SHARED_CHMAP);
5756 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5757 	case 0:
5758 	default:
5759 		numchan = 1;
5760 		break;
5761 	case 1:
5762 		numchan = 2;
5763 		break;
5764 	case 2:
5765 		numchan = 4;
5766 		break;
5767 	case 3:
5768 		numchan = 8;
5769 		break;
5770 	case 4:
5771 		numchan = 3;
5772 		break;
5773 	case 5:
5774 		numchan = 6;
5775 		break;
5776 	case 6:
5777 		numchan = 10;
5778 		break;
5779 	case 7:
5780 		numchan = 12;
5781 		break;
5782 	case 8:
5783 		numchan = 16;
5784 		break;
5785 	}
5786 	rdev->mc.vram_width = numchan * chansize;
5787 	/* Could aper size report 0 ? */
5788 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5789 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5790 	/* size in MB on si */
5791 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5792 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5793 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5794 	si_vram_gtt_location(rdev, &rdev->mc);
5795 	radeon_update_bandwidth_info(rdev);
5796 
5797 	return 0;
5798 }
5799 
5800 /*
5801  * GART
5802  * VMID 0 is the physical GPU addresses as used by the kernel.
5803  * VMIDs 1-15 are used for userspace clients and are handled
5804  * by the radeon vm/hsa code.
5805  */
5806 /**
5807  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5808  *
5809  * @rdev: radeon_device pointer
5810  *
5811  * Flush the TLB for the VMID 0 page table (CIK).
5812  */
5813 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5814 {
5815 	/* flush hdp cache */
5816 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5817 
5818 	/* bits 0-15 are the VM contexts0-15 */
5819 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5820 }
5821 
5822 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5823 {
5824 	int i;
5825 	uint32_t sh_mem_bases, sh_mem_config;
5826 
5827 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5828 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5829 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5830 
5831 	mutex_lock(&rdev->srbm_mutex);
5832 	for (i = 8; i < 16; i++) {
5833 		cik_srbm_select(rdev, 0, 0, 0, i);
5834 		/* CP and shaders */
5835 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5836 		WREG32(SH_MEM_APE1_BASE, 1);
5837 		WREG32(SH_MEM_APE1_LIMIT, 0);
5838 		WREG32(SH_MEM_BASES, sh_mem_bases);
5839 	}
5840 	cik_srbm_select(rdev, 0, 0, 0, 0);
5841 	mutex_unlock(&rdev->srbm_mutex);
5842 }
5843 
5844 /**
5845  * cik_pcie_gart_enable - gart enable
5846  *
5847  * @rdev: radeon_device pointer
5848  *
5849  * This sets up the TLBs, programs the page tables for VMID0,
5850  * sets up the hw for VMIDs 1-15 which are allocated on
5851  * demand, and sets up the global locations for the LDS, GDS,
5852  * and GPUVM for FSA64 clients (CIK).
5853  * Returns 0 for success, errors for failure.
5854  */
5855 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5856 {
5857 	int r, i;
5858 
5859 	if (rdev->gart.robj == NULL) {
5860 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5861 		return -EINVAL;
5862 	}
5863 	r = radeon_gart_table_vram_pin(rdev);
5864 	if (r)
5865 		return r;
5866 	/* Setup TLB control */
5867 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5868 	       (0xA << 7) |
5869 	       ENABLE_L1_TLB |
5870 	       ENABLE_L1_FRAGMENT_PROCESSING |
5871 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5872 	       ENABLE_ADVANCED_DRIVER_MODEL |
5873 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5874 	/* Setup L2 cache */
5875 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5876 	       ENABLE_L2_FRAGMENT_PROCESSING |
5877 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5878 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5879 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5880 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5881 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5882 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5883 	       BANK_SELECT(4) |
5884 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5885 	/* setup context0 */
5886 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5887 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5888 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5889 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5890 			(u32)(rdev->dummy_page.addr >> 12));
5891 	WREG32(VM_CONTEXT0_CNTL2, 0);
5892 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5893 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5894 
5895 	WREG32(0x15D4, 0);
5896 	WREG32(0x15D8, 0);
5897 	WREG32(0x15DC, 0);
5898 
5899 	/* restore context1-15 */
5900 	/* set vm size, must be a multiple of 4 */
5901 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5902 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5903 	for (i = 1; i < 16; i++) {
5904 		if (i < 8)
5905 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5906 			       rdev->vm_manager.saved_table_addr[i]);
5907 		else
5908 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5909 			       rdev->vm_manager.saved_table_addr[i]);
5910 	}
5911 
5912 	/* enable context1-15 */
5913 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5914 	       (u32)(rdev->dummy_page.addr >> 12));
5915 	WREG32(VM_CONTEXT1_CNTL2, 4);
5916 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5917 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5918 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5919 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5920 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5921 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5922 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5923 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5924 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5925 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5926 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5927 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5928 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5929 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5930 
5931 	if (rdev->family == CHIP_KAVERI) {
5932 		u32 tmp = RREG32(CHUB_CONTROL);
5933 		tmp &= ~BYPASS_VM;
5934 		WREG32(CHUB_CONTROL, tmp);
5935 	}
5936 
5937 	/* XXX SH_MEM regs */
5938 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5939 	mutex_lock(&rdev->srbm_mutex);
5940 	for (i = 0; i < 16; i++) {
5941 		cik_srbm_select(rdev, 0, 0, 0, i);
5942 		/* CP and shaders */
5943 		WREG32(SH_MEM_CONFIG, 0);
5944 		WREG32(SH_MEM_APE1_BASE, 1);
5945 		WREG32(SH_MEM_APE1_LIMIT, 0);
5946 		WREG32(SH_MEM_BASES, 0);
5947 		/* SDMA GFX */
5948 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5949 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5950 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5951 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5952 		/* XXX SDMA RLC - todo */
5953 	}
5954 	cik_srbm_select(rdev, 0, 0, 0, 0);
5955 	mutex_unlock(&rdev->srbm_mutex);
5956 
5957 	cik_pcie_init_compute_vmid(rdev);
5958 
5959 	cik_pcie_gart_tlb_flush(rdev);
5960 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5961 		 (unsigned)(rdev->mc.gtt_size >> 20),
5962 		 (unsigned long long)rdev->gart.table_addr);
5963 	rdev->gart.ready = true;
5964 	return 0;
5965 }
5966 
5967 /**
5968  * cik_pcie_gart_disable - gart disable
5969  *
5970  * @rdev: radeon_device pointer
5971  *
5972  * This disables all VM page table (CIK).
5973  */
5974 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5975 {
5976 	unsigned i;
5977 
5978 	for (i = 1; i < 16; ++i) {
5979 		uint32_t reg;
5980 		if (i < 8)
5981 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5982 		else
5983 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5984 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5985 	}
5986 
5987 	/* Disable all tables */
5988 	WREG32(VM_CONTEXT0_CNTL, 0);
5989 	WREG32(VM_CONTEXT1_CNTL, 0);
5990 	/* Setup TLB control */
5991 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5992 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5993 	/* Setup L2 cache */
5994 	WREG32(VM_L2_CNTL,
5995 	       ENABLE_L2_FRAGMENT_PROCESSING |
5996 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5997 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5998 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5999 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
6000 	WREG32(VM_L2_CNTL2, 0);
6001 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
6002 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
6003 	radeon_gart_table_vram_unpin(rdev);
6004 }
6005 
6006 /**
6007  * cik_pcie_gart_fini - vm fini callback
6008  *
6009  * @rdev: radeon_device pointer
6010  *
6011  * Tears down the driver GART/VM setup (CIK).
6012  */
6013 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6014 {
6015 	cik_pcie_gart_disable(rdev);
6016 	radeon_gart_table_vram_free(rdev);
6017 	radeon_gart_fini(rdev);
6018 }
6019 
6020 /* vm parser */
6021 /**
6022  * cik_ib_parse - vm ib_parse callback
6023  *
6024  * @rdev: radeon_device pointer
6025  * @ib: indirect buffer pointer
6026  *
6027  * CIK uses hw IB checking so this is a nop (CIK).
6028  */
6029 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6030 {
6031 	return 0;
6032 }
6033 
6034 /*
6035  * vm
6036  * VMID 0 is the physical GPU addresses as used by the kernel.
6037  * VMIDs 1-15 are used for userspace clients and are handled
6038  * by the radeon vm/hsa code.
6039  */
6040 /**
6041  * cik_vm_init - cik vm init callback
6042  *
6043  * @rdev: radeon_device pointer
6044  *
6045  * Inits cik specific vm parameters (number of VMs, base of vram for
6046  * VMIDs 1-15) (CIK).
6047  * Returns 0 for success.
6048  */
6049 int cik_vm_init(struct radeon_device *rdev)
6050 {
6051 	/*
6052 	 * number of VMs
6053 	 * VMID 0 is reserved for System
6054 	 * radeon graphics/compute will use VMIDs 1-7
6055 	 * amdkfd will use VMIDs 8-15
6056 	 */
6057 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6058 	/* base offset of vram pages */
6059 	if (rdev->flags & RADEON_IS_IGP) {
6060 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6061 		tmp <<= 22;
6062 		rdev->vm_manager.vram_base_offset = tmp;
6063 	} else
6064 		rdev->vm_manager.vram_base_offset = 0;
6065 
6066 	return 0;
6067 }
6068 
6069 /**
6070  * cik_vm_fini - cik vm fini callback
6071  *
6072  * @rdev: radeon_device pointer
6073  *
6074  * Tear down any asic specific VM setup (CIK).
6075  */
6076 void cik_vm_fini(struct radeon_device *rdev)
6077 {
6078 }
6079 
6080 /**
6081  * cik_vm_decode_fault - print human readable fault info
6082  *
6083  * @rdev: radeon_device pointer
6084  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6085  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6086  *
6087  * Print human readable fault information (CIK).
6088  */
6089 static void cik_vm_decode_fault(struct radeon_device *rdev,
6090 				u32 status, u32 addr, u32 mc_client)
6091 {
6092 	u32 mc_id;
6093 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6094 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6095 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6096 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6097 
6098 	if (rdev->family == CHIP_HAWAII)
6099 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6100 	else
6101 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6102 
6103 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6104 	       protections, vmid, addr,
6105 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6106 	       block, mc_client, mc_id);
6107 }
6108 
6109 /**
6110  * cik_vm_flush - cik vm flush using the CP
6111  *
6112  * @rdev: radeon_device pointer
6113  *
6114  * Update the page table base and flush the VM TLB
6115  * using the CP (CIK).
6116  */
6117 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6118 		  unsigned vm_id, uint64_t pd_addr)
6119 {
6120 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6121 
6122 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6123 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6124 				 WRITE_DATA_DST_SEL(0)));
6125 	if (vm_id < 8) {
6126 		radeon_ring_write(ring,
6127 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6128 	} else {
6129 		radeon_ring_write(ring,
6130 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6131 	}
6132 	radeon_ring_write(ring, 0);
6133 	radeon_ring_write(ring, pd_addr >> 12);
6134 
6135 	/* update SH_MEM_* regs */
6136 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6137 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6138 				 WRITE_DATA_DST_SEL(0)));
6139 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6140 	radeon_ring_write(ring, 0);
6141 	radeon_ring_write(ring, VMID(vm_id));
6142 
6143 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6144 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6145 				 WRITE_DATA_DST_SEL(0)));
6146 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6147 	radeon_ring_write(ring, 0);
6148 
6149 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6150 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6151 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6152 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6153 
6154 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6155 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6156 				 WRITE_DATA_DST_SEL(0)));
6157 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6158 	radeon_ring_write(ring, 0);
6159 	radeon_ring_write(ring, VMID(0));
6160 
6161 	/* HDP flush */
6162 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6163 
6164 	/* bits 0-15 are the VM contexts0-15 */
6165 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6166 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6167 				 WRITE_DATA_DST_SEL(0)));
6168 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6169 	radeon_ring_write(ring, 0);
6170 	radeon_ring_write(ring, 1 << vm_id);
6171 
6172 	/* wait for the invalidate to complete */
6173 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6174 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6175 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6176 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6177 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6178 	radeon_ring_write(ring, 0);
6179 	radeon_ring_write(ring, 0); /* ref */
6180 	radeon_ring_write(ring, 0); /* mask */
6181 	radeon_ring_write(ring, 0x20); /* poll interval */
6182 
6183 	/* compute doesn't have PFP */
6184 	if (usepfp) {
6185 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6186 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6187 		radeon_ring_write(ring, 0x0);
6188 	}
6189 }
6190 
6191 /*
6192  * RLC
6193  * The RLC is a multi-purpose microengine that handles a
6194  * variety of functions, the most important of which is
6195  * the interrupt controller.
6196  */
6197 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6198 					  bool enable)
6199 {
6200 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6201 
6202 	if (enable)
6203 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6204 	else
6205 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6206 	WREG32(CP_INT_CNTL_RING0, tmp);
6207 }
6208 
6209 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6210 {
6211 	u32 tmp;
6212 
6213 	tmp = RREG32(RLC_LB_CNTL);
6214 	if (enable)
6215 		tmp |= LOAD_BALANCE_ENABLE;
6216 	else
6217 		tmp &= ~LOAD_BALANCE_ENABLE;
6218 	WREG32(RLC_LB_CNTL, tmp);
6219 }
6220 
6221 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6222 {
6223 	u32 i, j, k;
6224 	u32 mask;
6225 
6226 	mutex_lock(&rdev->grbm_idx_mutex);
6227 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6228 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6229 			cik_select_se_sh(rdev, i, j);
6230 			for (k = 0; k < rdev->usec_timeout; k++) {
6231 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6232 					break;
6233 				udelay(1);
6234 			}
6235 		}
6236 	}
6237 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6238 	mutex_unlock(&rdev->grbm_idx_mutex);
6239 
6240 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6241 	for (k = 0; k < rdev->usec_timeout; k++) {
6242 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6243 			break;
6244 		udelay(1);
6245 	}
6246 }
6247 
6248 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6249 {
6250 	u32 tmp;
6251 
6252 	tmp = RREG32(RLC_CNTL);
6253 	if (tmp != rlc)
6254 		WREG32(RLC_CNTL, rlc);
6255 }
6256 
6257 static u32 cik_halt_rlc(struct radeon_device *rdev)
6258 {
6259 	u32 data, orig;
6260 
6261 	orig = data = RREG32(RLC_CNTL);
6262 
6263 	if (data & RLC_ENABLE) {
6264 		u32 i;
6265 
6266 		data &= ~RLC_ENABLE;
6267 		WREG32(RLC_CNTL, data);
6268 
6269 		for (i = 0; i < rdev->usec_timeout; i++) {
6270 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6271 				break;
6272 			udelay(1);
6273 		}
6274 
6275 		cik_wait_for_rlc_serdes(rdev);
6276 	}
6277 
6278 	return orig;
6279 }
6280 
6281 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6282 {
6283 	u32 tmp, i, mask;
6284 
6285 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6286 	WREG32(RLC_GPR_REG2, tmp);
6287 
6288 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6289 	for (i = 0; i < rdev->usec_timeout; i++) {
6290 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6291 			break;
6292 		udelay(1);
6293 	}
6294 
6295 	for (i = 0; i < rdev->usec_timeout; i++) {
6296 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6297 			break;
6298 		udelay(1);
6299 	}
6300 }
6301 
6302 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6303 {
6304 	u32 tmp;
6305 
6306 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6307 	WREG32(RLC_GPR_REG2, tmp);
6308 }
6309 
6310 /**
6311  * cik_rlc_stop - stop the RLC ME
6312  *
6313  * @rdev: radeon_device pointer
6314  *
6315  * Halt the RLC ME (MicroEngine) (CIK).
6316  */
6317 static void cik_rlc_stop(struct radeon_device *rdev)
6318 {
6319 	WREG32(RLC_CNTL, 0);
6320 
6321 	cik_enable_gui_idle_interrupt(rdev, false);
6322 
6323 	cik_wait_for_rlc_serdes(rdev);
6324 }
6325 
6326 /**
6327  * cik_rlc_start - start the RLC ME
6328  *
6329  * @rdev: radeon_device pointer
6330  *
6331  * Unhalt the RLC ME (MicroEngine) (CIK).
6332  */
6333 static void cik_rlc_start(struct radeon_device *rdev)
6334 {
6335 	WREG32(RLC_CNTL, RLC_ENABLE);
6336 
6337 	cik_enable_gui_idle_interrupt(rdev, true);
6338 
6339 	udelay(50);
6340 }
6341 
6342 /**
6343  * cik_rlc_resume - setup the RLC hw
6344  *
6345  * @rdev: radeon_device pointer
6346  *
6347  * Initialize the RLC registers, load the ucode,
6348  * and start the RLC (CIK).
6349  * Returns 0 for success, -EINVAL if the ucode is not available.
6350  */
6351 static int cik_rlc_resume(struct radeon_device *rdev)
6352 {
6353 	u32 i, size, tmp;
6354 
6355 	if (!rdev->rlc_fw)
6356 		return -EINVAL;
6357 
6358 	cik_rlc_stop(rdev);
6359 
6360 	/* disable CG */
6361 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6362 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6363 
6364 	si_rlc_reset(rdev);
6365 
6366 	cik_init_pg(rdev);
6367 
6368 	cik_init_cg(rdev);
6369 
6370 	WREG32(RLC_LB_CNTR_INIT, 0);
6371 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6372 
6373 	mutex_lock(&rdev->grbm_idx_mutex);
6374 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6375 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6376 	WREG32(RLC_LB_PARAMS, 0x00600408);
6377 	WREG32(RLC_LB_CNTL, 0x80000004);
6378 	mutex_unlock(&rdev->grbm_idx_mutex);
6379 
6380 	WREG32(RLC_MC_CNTL, 0);
6381 	WREG32(RLC_UCODE_CNTL, 0);
6382 
6383 	if (rdev->new_fw) {
6384 		const struct rlc_firmware_header_v1_0 *hdr =
6385 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6386 		const __le32 *fw_data = (const __le32 *)
6387 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6388 
6389 		radeon_ucode_print_rlc_hdr(&hdr->header);
6390 
6391 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6392 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6393 		for (i = 0; i < size; i++)
6394 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6395 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6396 	} else {
6397 		const __be32 *fw_data;
6398 
6399 		switch (rdev->family) {
6400 		case CHIP_BONAIRE:
6401 		case CHIP_HAWAII:
6402 		default:
6403 			size = BONAIRE_RLC_UCODE_SIZE;
6404 			break;
6405 		case CHIP_KAVERI:
6406 			size = KV_RLC_UCODE_SIZE;
6407 			break;
6408 		case CHIP_KABINI:
6409 			size = KB_RLC_UCODE_SIZE;
6410 			break;
6411 		case CHIP_MULLINS:
6412 			size = ML_RLC_UCODE_SIZE;
6413 			break;
6414 		}
6415 
6416 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6417 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6418 		for (i = 0; i < size; i++)
6419 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6420 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6421 	}
6422 
6423 	/* XXX - find out what chips support lbpw */
6424 	cik_enable_lbpw(rdev, false);
6425 
6426 	if (rdev->family == CHIP_BONAIRE)
6427 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6428 
6429 	cik_rlc_start(rdev);
6430 
6431 	return 0;
6432 }
6433 
6434 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6435 {
6436 	u32 data, orig, tmp, tmp2;
6437 
6438 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6439 
6440 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6441 		cik_enable_gui_idle_interrupt(rdev, true);
6442 
6443 		tmp = cik_halt_rlc(rdev);
6444 
6445 		mutex_lock(&rdev->grbm_idx_mutex);
6446 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6447 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6448 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6449 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6450 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6451 		mutex_unlock(&rdev->grbm_idx_mutex);
6452 
6453 		cik_update_rlc(rdev, tmp);
6454 
6455 		data |= CGCG_EN | CGLS_EN;
6456 	} else {
6457 		cik_enable_gui_idle_interrupt(rdev, false);
6458 
6459 		RREG32(CB_CGTT_SCLK_CTRL);
6460 		RREG32(CB_CGTT_SCLK_CTRL);
6461 		RREG32(CB_CGTT_SCLK_CTRL);
6462 		RREG32(CB_CGTT_SCLK_CTRL);
6463 
6464 		data &= ~(CGCG_EN | CGLS_EN);
6465 	}
6466 
6467 	if (orig != data)
6468 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6469 
6470 }
6471 
6472 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6473 {
6474 	u32 data, orig, tmp = 0;
6475 
6476 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6477 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6478 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6479 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6480 				data |= CP_MEM_LS_EN;
6481 				if (orig != data)
6482 					WREG32(CP_MEM_SLP_CNTL, data);
6483 			}
6484 		}
6485 
6486 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6487 		data |= 0x00000001;
6488 		data &= 0xfffffffd;
6489 		if (orig != data)
6490 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6491 
6492 		tmp = cik_halt_rlc(rdev);
6493 
6494 		mutex_lock(&rdev->grbm_idx_mutex);
6495 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6496 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6497 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6498 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6499 		WREG32(RLC_SERDES_WR_CTRL, data);
6500 		mutex_unlock(&rdev->grbm_idx_mutex);
6501 
6502 		cik_update_rlc(rdev, tmp);
6503 
6504 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6505 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6506 			data &= ~SM_MODE_MASK;
6507 			data |= SM_MODE(0x2);
6508 			data |= SM_MODE_ENABLE;
6509 			data &= ~CGTS_OVERRIDE;
6510 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6511 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6512 				data &= ~CGTS_LS_OVERRIDE;
6513 			data &= ~ON_MONITOR_ADD_MASK;
6514 			data |= ON_MONITOR_ADD_EN;
6515 			data |= ON_MONITOR_ADD(0x96);
6516 			if (orig != data)
6517 				WREG32(CGTS_SM_CTRL_REG, data);
6518 		}
6519 	} else {
6520 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6521 		data |= 0x00000003;
6522 		if (orig != data)
6523 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6524 
6525 		data = RREG32(RLC_MEM_SLP_CNTL);
6526 		if (data & RLC_MEM_LS_EN) {
6527 			data &= ~RLC_MEM_LS_EN;
6528 			WREG32(RLC_MEM_SLP_CNTL, data);
6529 		}
6530 
6531 		data = RREG32(CP_MEM_SLP_CNTL);
6532 		if (data & CP_MEM_LS_EN) {
6533 			data &= ~CP_MEM_LS_EN;
6534 			WREG32(CP_MEM_SLP_CNTL, data);
6535 		}
6536 
6537 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6538 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6539 		if (orig != data)
6540 			WREG32(CGTS_SM_CTRL_REG, data);
6541 
6542 		tmp = cik_halt_rlc(rdev);
6543 
6544 		mutex_lock(&rdev->grbm_idx_mutex);
6545 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6546 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6547 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6548 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6549 		WREG32(RLC_SERDES_WR_CTRL, data);
6550 		mutex_unlock(&rdev->grbm_idx_mutex);
6551 
6552 		cik_update_rlc(rdev, tmp);
6553 	}
6554 }
6555 
6556 static const u32 mc_cg_registers[] =
6557 {
6558 	MC_HUB_MISC_HUB_CG,
6559 	MC_HUB_MISC_SIP_CG,
6560 	MC_HUB_MISC_VM_CG,
6561 	MC_XPB_CLK_GAT,
6562 	ATC_MISC_CG,
6563 	MC_CITF_MISC_WR_CG,
6564 	MC_CITF_MISC_RD_CG,
6565 	MC_CITF_MISC_VM_CG,
6566 	VM_L2_CG,
6567 };
6568 
6569 static void cik_enable_mc_ls(struct radeon_device *rdev,
6570 			     bool enable)
6571 {
6572 	int i;
6573 	u32 orig, data;
6574 
6575 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6576 		orig = data = RREG32(mc_cg_registers[i]);
6577 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6578 			data |= MC_LS_ENABLE;
6579 		else
6580 			data &= ~MC_LS_ENABLE;
6581 		if (data != orig)
6582 			WREG32(mc_cg_registers[i], data);
6583 	}
6584 }
6585 
6586 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6587 			       bool enable)
6588 {
6589 	int i;
6590 	u32 orig, data;
6591 
6592 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6593 		orig = data = RREG32(mc_cg_registers[i]);
6594 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6595 			data |= MC_CG_ENABLE;
6596 		else
6597 			data &= ~MC_CG_ENABLE;
6598 		if (data != orig)
6599 			WREG32(mc_cg_registers[i], data);
6600 	}
6601 }
6602 
6603 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6604 				 bool enable)
6605 {
6606 	u32 orig, data;
6607 
6608 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6609 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6610 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6611 	} else {
6612 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6613 		data |= 0xff000000;
6614 		if (data != orig)
6615 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6616 
6617 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6618 		data |= 0xff000000;
6619 		if (data != orig)
6620 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6621 	}
6622 }
6623 
6624 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6625 				 bool enable)
6626 {
6627 	u32 orig, data;
6628 
6629 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6630 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6631 		data |= 0x100;
6632 		if (orig != data)
6633 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6634 
6635 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6636 		data |= 0x100;
6637 		if (orig != data)
6638 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6639 	} else {
6640 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6641 		data &= ~0x100;
6642 		if (orig != data)
6643 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6644 
6645 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6646 		data &= ~0x100;
6647 		if (orig != data)
6648 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6649 	}
6650 }
6651 
6652 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6653 				bool enable)
6654 {
6655 	u32 orig, data;
6656 
6657 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6658 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6659 		data = 0xfff;
6660 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6661 
6662 		orig = data = RREG32(UVD_CGC_CTRL);
6663 		data |= DCM;
6664 		if (orig != data)
6665 			WREG32(UVD_CGC_CTRL, data);
6666 	} else {
6667 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6668 		data &= ~0xfff;
6669 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6670 
6671 		orig = data = RREG32(UVD_CGC_CTRL);
6672 		data &= ~DCM;
6673 		if (orig != data)
6674 			WREG32(UVD_CGC_CTRL, data);
6675 	}
6676 }
6677 
6678 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6679 			       bool enable)
6680 {
6681 	u32 orig, data;
6682 
6683 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6684 
6685 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6686 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6687 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6688 	else
6689 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6690 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6691 
6692 	if (orig != data)
6693 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6694 }
6695 
6696 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6697 				bool enable)
6698 {
6699 	u32 orig, data;
6700 
6701 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6702 
6703 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6704 		data &= ~CLOCK_GATING_DIS;
6705 	else
6706 		data |= CLOCK_GATING_DIS;
6707 
6708 	if (orig != data)
6709 		WREG32(HDP_HOST_PATH_CNTL, data);
6710 }
6711 
6712 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6713 			      bool enable)
6714 {
6715 	u32 orig, data;
6716 
6717 	orig = data = RREG32(HDP_MEM_POWER_LS);
6718 
6719 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6720 		data |= HDP_LS_ENABLE;
6721 	else
6722 		data &= ~HDP_LS_ENABLE;
6723 
6724 	if (orig != data)
6725 		WREG32(HDP_MEM_POWER_LS, data);
6726 }
6727 
6728 void cik_update_cg(struct radeon_device *rdev,
6729 		   u32 block, bool enable)
6730 {
6731 
6732 	if (block & RADEON_CG_BLOCK_GFX) {
6733 		cik_enable_gui_idle_interrupt(rdev, false);
6734 		/* order matters! */
6735 		if (enable) {
6736 			cik_enable_mgcg(rdev, true);
6737 			cik_enable_cgcg(rdev, true);
6738 		} else {
6739 			cik_enable_cgcg(rdev, false);
6740 			cik_enable_mgcg(rdev, false);
6741 		}
6742 		cik_enable_gui_idle_interrupt(rdev, true);
6743 	}
6744 
6745 	if (block & RADEON_CG_BLOCK_MC) {
6746 		if (!(rdev->flags & RADEON_IS_IGP)) {
6747 			cik_enable_mc_mgcg(rdev, enable);
6748 			cik_enable_mc_ls(rdev, enable);
6749 		}
6750 	}
6751 
6752 	if (block & RADEON_CG_BLOCK_SDMA) {
6753 		cik_enable_sdma_mgcg(rdev, enable);
6754 		cik_enable_sdma_mgls(rdev, enable);
6755 	}
6756 
6757 	if (block & RADEON_CG_BLOCK_BIF) {
6758 		cik_enable_bif_mgls(rdev, enable);
6759 	}
6760 
6761 	if (block & RADEON_CG_BLOCK_UVD) {
6762 		if (rdev->has_uvd)
6763 			cik_enable_uvd_mgcg(rdev, enable);
6764 	}
6765 
6766 	if (block & RADEON_CG_BLOCK_HDP) {
6767 		cik_enable_hdp_mgcg(rdev, enable);
6768 		cik_enable_hdp_ls(rdev, enable);
6769 	}
6770 
6771 	if (block & RADEON_CG_BLOCK_VCE) {
6772 		vce_v2_0_enable_mgcg(rdev, enable);
6773 	}
6774 }
6775 
6776 static void cik_init_cg(struct radeon_device *rdev)
6777 {
6778 
6779 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6780 
6781 	if (rdev->has_uvd)
6782 		si_init_uvd_internal_cg(rdev);
6783 
6784 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6785 			     RADEON_CG_BLOCK_SDMA |
6786 			     RADEON_CG_BLOCK_BIF |
6787 			     RADEON_CG_BLOCK_UVD |
6788 			     RADEON_CG_BLOCK_HDP), true);
6789 }
6790 
6791 static void cik_fini_cg(struct radeon_device *rdev)
6792 {
6793 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6794 			     RADEON_CG_BLOCK_SDMA |
6795 			     RADEON_CG_BLOCK_BIF |
6796 			     RADEON_CG_BLOCK_UVD |
6797 			     RADEON_CG_BLOCK_HDP), false);
6798 
6799 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6800 }
6801 
6802 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6803 					  bool enable)
6804 {
6805 	u32 data, orig;
6806 
6807 	orig = data = RREG32(RLC_PG_CNTL);
6808 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6809 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6810 	else
6811 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6812 	if (orig != data)
6813 		WREG32(RLC_PG_CNTL, data);
6814 }
6815 
6816 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6817 					  bool enable)
6818 {
6819 	u32 data, orig;
6820 
6821 	orig = data = RREG32(RLC_PG_CNTL);
6822 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6823 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6824 	else
6825 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6826 	if (orig != data)
6827 		WREG32(RLC_PG_CNTL, data);
6828 }
6829 
6830 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6831 {
6832 	u32 data, orig;
6833 
6834 	orig = data = RREG32(RLC_PG_CNTL);
6835 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6836 		data &= ~DISABLE_CP_PG;
6837 	else
6838 		data |= DISABLE_CP_PG;
6839 	if (orig != data)
6840 		WREG32(RLC_PG_CNTL, data);
6841 }
6842 
6843 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6844 {
6845 	u32 data, orig;
6846 
6847 	orig = data = RREG32(RLC_PG_CNTL);
6848 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6849 		data &= ~DISABLE_GDS_PG;
6850 	else
6851 		data |= DISABLE_GDS_PG;
6852 	if (orig != data)
6853 		WREG32(RLC_PG_CNTL, data);
6854 }
6855 
6856 #define CP_ME_TABLE_SIZE    96
6857 #define CP_ME_TABLE_OFFSET  2048
6858 #define CP_MEC_TABLE_OFFSET 4096
6859 
6860 void cik_init_cp_pg_table(struct radeon_device *rdev)
6861 {
6862 	volatile u32 *dst_ptr;
6863 	int me, i, max_me = 4;
6864 	u32 bo_offset = 0;
6865 	u32 table_offset, table_size;
6866 
6867 	if (rdev->family == CHIP_KAVERI)
6868 		max_me = 5;
6869 
6870 	if (rdev->rlc.cp_table_ptr == NULL)
6871 		return;
6872 
6873 	/* write the cp table buffer */
6874 	dst_ptr = rdev->rlc.cp_table_ptr;
6875 	for (me = 0; me < max_me; me++) {
6876 		if (rdev->new_fw) {
6877 			const __le32 *fw_data;
6878 			const struct gfx_firmware_header_v1_0 *hdr;
6879 
6880 			if (me == 0) {
6881 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6882 				fw_data = (const __le32 *)
6883 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6884 				table_offset = le32_to_cpu(hdr->jt_offset);
6885 				table_size = le32_to_cpu(hdr->jt_size);
6886 			} else if (me == 1) {
6887 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6888 				fw_data = (const __le32 *)
6889 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6890 				table_offset = le32_to_cpu(hdr->jt_offset);
6891 				table_size = le32_to_cpu(hdr->jt_size);
6892 			} else if (me == 2) {
6893 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6894 				fw_data = (const __le32 *)
6895 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6896 				table_offset = le32_to_cpu(hdr->jt_offset);
6897 				table_size = le32_to_cpu(hdr->jt_size);
6898 			} else if (me == 3) {
6899 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6900 				fw_data = (const __le32 *)
6901 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6902 				table_offset = le32_to_cpu(hdr->jt_offset);
6903 				table_size = le32_to_cpu(hdr->jt_size);
6904 			} else {
6905 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6906 				fw_data = (const __le32 *)
6907 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6908 				table_offset = le32_to_cpu(hdr->jt_offset);
6909 				table_size = le32_to_cpu(hdr->jt_size);
6910 			}
6911 
6912 			for (i = 0; i < table_size; i ++) {
6913 				dst_ptr[bo_offset + i] =
6914 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6915 			}
6916 			bo_offset += table_size;
6917 		} else {
6918 			const __be32 *fw_data;
6919 			table_size = CP_ME_TABLE_SIZE;
6920 
6921 			if (me == 0) {
6922 				fw_data = (const __be32 *)rdev->ce_fw->data;
6923 				table_offset = CP_ME_TABLE_OFFSET;
6924 			} else if (me == 1) {
6925 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6926 				table_offset = CP_ME_TABLE_OFFSET;
6927 			} else if (me == 2) {
6928 				fw_data = (const __be32 *)rdev->me_fw->data;
6929 				table_offset = CP_ME_TABLE_OFFSET;
6930 			} else {
6931 				fw_data = (const __be32 *)rdev->mec_fw->data;
6932 				table_offset = CP_MEC_TABLE_OFFSET;
6933 			}
6934 
6935 			for (i = 0; i < table_size; i ++) {
6936 				dst_ptr[bo_offset + i] =
6937 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6938 			}
6939 			bo_offset += table_size;
6940 		}
6941 	}
6942 }
6943 
6944 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6945 				bool enable)
6946 {
6947 	u32 data, orig;
6948 
6949 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6950 		orig = data = RREG32(RLC_PG_CNTL);
6951 		data |= GFX_PG_ENABLE;
6952 		if (orig != data)
6953 			WREG32(RLC_PG_CNTL, data);
6954 
6955 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6956 		data |= AUTO_PG_EN;
6957 		if (orig != data)
6958 			WREG32(RLC_AUTO_PG_CTRL, data);
6959 	} else {
6960 		orig = data = RREG32(RLC_PG_CNTL);
6961 		data &= ~GFX_PG_ENABLE;
6962 		if (orig != data)
6963 			WREG32(RLC_PG_CNTL, data);
6964 
6965 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6966 		data &= ~AUTO_PG_EN;
6967 		if (orig != data)
6968 			WREG32(RLC_AUTO_PG_CTRL, data);
6969 
6970 		data = RREG32(DB_RENDER_CONTROL);
6971 	}
6972 }
6973 
6974 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6975 {
6976 	u32 mask = 0, tmp, tmp1;
6977 	int i;
6978 
6979 	mutex_lock(&rdev->grbm_idx_mutex);
6980 	cik_select_se_sh(rdev, se, sh);
6981 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6982 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6983 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6984 	mutex_unlock(&rdev->grbm_idx_mutex);
6985 
6986 	tmp &= 0xffff0000;
6987 
6988 	tmp |= tmp1;
6989 	tmp >>= 16;
6990 
6991 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6992 		mask <<= 1;
6993 		mask |= 1;
6994 	}
6995 
6996 	return (~tmp) & mask;
6997 }
6998 
6999 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
7000 {
7001 	u32 i, j, k, active_cu_number = 0;
7002 	u32 mask, counter, cu_bitmap;
7003 	u32 tmp = 0;
7004 
7005 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
7006 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7007 			mask = 1;
7008 			cu_bitmap = 0;
7009 			counter = 0;
7010 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7011 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7012 					if (counter < 2)
7013 						cu_bitmap |= mask;
7014 					counter ++;
7015 				}
7016 				mask <<= 1;
7017 			}
7018 
7019 			active_cu_number += counter;
7020 			tmp |= (cu_bitmap << (i * 16 + j * 8));
7021 		}
7022 	}
7023 
7024 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7025 
7026 	tmp = RREG32(RLC_MAX_PG_CU);
7027 	tmp &= ~MAX_PU_CU_MASK;
7028 	tmp |= MAX_PU_CU(active_cu_number);
7029 	WREG32(RLC_MAX_PG_CU, tmp);
7030 }
7031 
7032 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7033 				       bool enable)
7034 {
7035 	u32 data, orig;
7036 
7037 	orig = data = RREG32(RLC_PG_CNTL);
7038 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7039 		data |= STATIC_PER_CU_PG_ENABLE;
7040 	else
7041 		data &= ~STATIC_PER_CU_PG_ENABLE;
7042 	if (orig != data)
7043 		WREG32(RLC_PG_CNTL, data);
7044 }
7045 
7046 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7047 					bool enable)
7048 {
7049 	u32 data, orig;
7050 
7051 	orig = data = RREG32(RLC_PG_CNTL);
7052 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7053 		data |= DYN_PER_CU_PG_ENABLE;
7054 	else
7055 		data &= ~DYN_PER_CU_PG_ENABLE;
7056 	if (orig != data)
7057 		WREG32(RLC_PG_CNTL, data);
7058 }
7059 
7060 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7061 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7062 
7063 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7064 {
7065 	u32 data, orig;
7066 	u32 i;
7067 
7068 	if (rdev->rlc.cs_data) {
7069 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7070 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7071 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7072 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7073 	} else {
7074 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7075 		for (i = 0; i < 3; i++)
7076 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7077 	}
7078 	if (rdev->rlc.reg_list) {
7079 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7080 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7081 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7082 	}
7083 
7084 	orig = data = RREG32(RLC_PG_CNTL);
7085 	data |= GFX_PG_SRC;
7086 	if (orig != data)
7087 		WREG32(RLC_PG_CNTL, data);
7088 
7089 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7090 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7091 
7092 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7093 	data &= ~IDLE_POLL_COUNT_MASK;
7094 	data |= IDLE_POLL_COUNT(0x60);
7095 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7096 
7097 	data = 0x10101010;
7098 	WREG32(RLC_PG_DELAY, data);
7099 
7100 	data = RREG32(RLC_PG_DELAY_2);
7101 	data &= ~0xff;
7102 	data |= 0x3;
7103 	WREG32(RLC_PG_DELAY_2, data);
7104 
7105 	data = RREG32(RLC_AUTO_PG_CTRL);
7106 	data &= ~GRBM_REG_SGIT_MASK;
7107 	data |= GRBM_REG_SGIT(0x700);
7108 	WREG32(RLC_AUTO_PG_CTRL, data);
7109 
7110 }
7111 
7112 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7113 {
7114 	cik_enable_gfx_cgpg(rdev, enable);
7115 	cik_enable_gfx_static_mgpg(rdev, enable);
7116 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7117 }
7118 
7119 u32 cik_get_csb_size(struct radeon_device *rdev)
7120 {
7121 	u32 count = 0;
7122 	const struct cs_section_def *sect = NULL;
7123 	const struct cs_extent_def *ext = NULL;
7124 
7125 	if (rdev->rlc.cs_data == NULL)
7126 		return 0;
7127 
7128 	/* begin clear state */
7129 	count += 2;
7130 	/* context control state */
7131 	count += 3;
7132 
7133 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7134 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7135 			if (sect->id == SECT_CONTEXT)
7136 				count += 2 + ext->reg_count;
7137 			else
7138 				return 0;
7139 		}
7140 	}
7141 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7142 	count += 4;
7143 	/* end clear state */
7144 	count += 2;
7145 	/* clear state */
7146 	count += 2;
7147 
7148 	return count;
7149 }
7150 
7151 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7152 {
7153 	u32 count = 0, i;
7154 	const struct cs_section_def *sect = NULL;
7155 	const struct cs_extent_def *ext = NULL;
7156 
7157 	if (rdev->rlc.cs_data == NULL)
7158 		return;
7159 	if (buffer == NULL)
7160 		return;
7161 
7162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7163 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7164 
7165 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7166 	buffer[count++] = cpu_to_le32(0x80000000);
7167 	buffer[count++] = cpu_to_le32(0x80000000);
7168 
7169 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7170 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7171 			if (sect->id == SECT_CONTEXT) {
7172 				buffer[count++] =
7173 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7174 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7175 				for (i = 0; i < ext->reg_count; i++)
7176 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7177 			} else {
7178 				return;
7179 			}
7180 		}
7181 	}
7182 
7183 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7184 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7185 	switch (rdev->family) {
7186 	case CHIP_BONAIRE:
7187 		buffer[count++] = cpu_to_le32(0x16000012);
7188 		buffer[count++] = cpu_to_le32(0x00000000);
7189 		break;
7190 	case CHIP_KAVERI:
7191 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7192 		buffer[count++] = cpu_to_le32(0x00000000);
7193 		break;
7194 	case CHIP_KABINI:
7195 	case CHIP_MULLINS:
7196 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7197 		buffer[count++] = cpu_to_le32(0x00000000);
7198 		break;
7199 	case CHIP_HAWAII:
7200 		buffer[count++] = cpu_to_le32(0x3a00161a);
7201 		buffer[count++] = cpu_to_le32(0x0000002e);
7202 		break;
7203 	default:
7204 		buffer[count++] = cpu_to_le32(0x00000000);
7205 		buffer[count++] = cpu_to_le32(0x00000000);
7206 		break;
7207 	}
7208 
7209 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7210 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7211 
7212 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7213 	buffer[count++] = cpu_to_le32(0);
7214 }
7215 
7216 static void cik_init_pg(struct radeon_device *rdev)
7217 {
7218 	if (rdev->pg_flags) {
7219 		cik_enable_sck_slowdown_on_pu(rdev, true);
7220 		cik_enable_sck_slowdown_on_pd(rdev, true);
7221 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7222 			cik_init_gfx_cgpg(rdev);
7223 			cik_enable_cp_pg(rdev, true);
7224 			cik_enable_gds_pg(rdev, true);
7225 		}
7226 		cik_init_ao_cu_mask(rdev);
7227 		cik_update_gfx_pg(rdev, true);
7228 	}
7229 }
7230 
7231 static void cik_fini_pg(struct radeon_device *rdev)
7232 {
7233 	if (rdev->pg_flags) {
7234 		cik_update_gfx_pg(rdev, false);
7235 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7236 			cik_enable_cp_pg(rdev, false);
7237 			cik_enable_gds_pg(rdev, false);
7238 		}
7239 	}
7240 }
7241 
7242 /*
7243  * Interrupts
7244  * Starting with r6xx, interrupts are handled via a ring buffer.
7245  * Ring buffers are areas of GPU accessible memory that the GPU
7246  * writes interrupt vectors into and the host reads vectors out of.
7247  * There is a rptr (read pointer) that determines where the
7248  * host is currently reading, and a wptr (write pointer)
7249  * which determines where the GPU has written.  When the
7250  * pointers are equal, the ring is idle.  When the GPU
7251  * writes vectors to the ring buffer, it increments the
7252  * wptr.  When there is an interrupt, the host then starts
7253  * fetching commands and processing them until the pointers are
7254  * equal again at which point it updates the rptr.
7255  */
7256 
7257 /**
7258  * cik_enable_interrupts - Enable the interrupt ring buffer
7259  *
7260  * @rdev: radeon_device pointer
7261  *
7262  * Enable the interrupt ring buffer (CIK).
7263  */
7264 static void cik_enable_interrupts(struct radeon_device *rdev)
7265 {
7266 	u32 ih_cntl = RREG32(IH_CNTL);
7267 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7268 
7269 	ih_cntl |= ENABLE_INTR;
7270 	ih_rb_cntl |= IH_RB_ENABLE;
7271 	WREG32(IH_CNTL, ih_cntl);
7272 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7273 	rdev->ih.enabled = true;
7274 }
7275 
7276 /**
7277  * cik_disable_interrupts - Disable the interrupt ring buffer
7278  *
7279  * @rdev: radeon_device pointer
7280  *
7281  * Disable the interrupt ring buffer (CIK).
7282  */
7283 static void cik_disable_interrupts(struct radeon_device *rdev)
7284 {
7285 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7286 	u32 ih_cntl = RREG32(IH_CNTL);
7287 
7288 	ih_rb_cntl &= ~IH_RB_ENABLE;
7289 	ih_cntl &= ~ENABLE_INTR;
7290 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7291 	WREG32(IH_CNTL, ih_cntl);
7292 	/* set rptr, wptr to 0 */
7293 	WREG32(IH_RB_RPTR, 0);
7294 	WREG32(IH_RB_WPTR, 0);
7295 	rdev->ih.enabled = false;
7296 	rdev->ih.rptr = 0;
7297 }
7298 
7299 /**
7300  * cik_disable_interrupt_state - Disable all interrupt sources
7301  *
7302  * @rdev: radeon_device pointer
7303  *
7304  * Clear all interrupt enable bits used by the driver (CIK).
7305  */
7306 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7307 {
7308 	u32 tmp;
7309 
7310 	/* gfx ring */
7311 	tmp = RREG32(CP_INT_CNTL_RING0) &
7312 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7313 	WREG32(CP_INT_CNTL_RING0, tmp);
7314 	/* sdma */
7315 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7316 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7317 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7318 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7319 	/* compute queues */
7320 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7321 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7322 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7323 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7324 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7325 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7326 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7327 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7328 	/* grbm */
7329 	WREG32(GRBM_INT_CNTL, 0);
7330 	/* SRBM */
7331 	WREG32(SRBM_INT_CNTL, 0);
7332 	/* vline/vblank, etc. */
7333 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7334 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7335 	if (rdev->num_crtc >= 4) {
7336 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7337 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7338 	}
7339 	if (rdev->num_crtc >= 6) {
7340 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7341 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7342 	}
7343 	/* pflip */
7344 	if (rdev->num_crtc >= 2) {
7345 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7346 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7347 	}
7348 	if (rdev->num_crtc >= 4) {
7349 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7350 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7351 	}
7352 	if (rdev->num_crtc >= 6) {
7353 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7354 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7355 	}
7356 
7357 	/* dac hotplug */
7358 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7359 
7360 	/* digital hotplug */
7361 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7362 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7363 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7364 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7365 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7366 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7367 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7368 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7369 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7370 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7371 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7372 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7373 
7374 }
7375 
7376 /**
7377  * cik_irq_init - init and enable the interrupt ring
7378  *
7379  * @rdev: radeon_device pointer
7380  *
7381  * Allocate a ring buffer for the interrupt controller,
7382  * enable the RLC, disable interrupts, enable the IH
7383  * ring buffer and enable it (CIK).
7384  * Called at device load and reume.
7385  * Returns 0 for success, errors for failure.
7386  */
7387 static int cik_irq_init(struct radeon_device *rdev)
7388 {
7389 	int ret = 0;
7390 	int rb_bufsz;
7391 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7392 
7393 	/* allocate ring */
7394 	ret = r600_ih_ring_alloc(rdev);
7395 	if (ret)
7396 		return ret;
7397 
7398 	/* disable irqs */
7399 	cik_disable_interrupts(rdev);
7400 
7401 	/* init rlc */
7402 	ret = cik_rlc_resume(rdev);
7403 	if (ret) {
7404 		r600_ih_ring_fini(rdev);
7405 		return ret;
7406 	}
7407 
7408 	/* setup interrupt control */
7409 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7410 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7411 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7412 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7413 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7414 	 */
7415 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7416 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7417 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7418 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7419 
7420 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7421 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7422 
7423 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7424 		      IH_WPTR_OVERFLOW_CLEAR |
7425 		      (rb_bufsz << 1));
7426 
7427 	if (rdev->wb.enabled)
7428 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7429 
7430 	/* set the writeback address whether it's enabled or not */
7431 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7432 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7433 
7434 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7435 
7436 	/* set rptr, wptr to 0 */
7437 	WREG32(IH_RB_RPTR, 0);
7438 	WREG32(IH_RB_WPTR, 0);
7439 
7440 	/* Default settings for IH_CNTL (disabled at first) */
7441 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7442 	/* RPTR_REARM only works if msi's are enabled */
7443 	if (rdev->msi_enabled)
7444 		ih_cntl |= RPTR_REARM;
7445 	WREG32(IH_CNTL, ih_cntl);
7446 
7447 	/* force the active interrupt state to all disabled */
7448 	cik_disable_interrupt_state(rdev);
7449 
7450 	pci_set_master(rdev->pdev);
7451 
7452 	/* enable irqs */
7453 	cik_enable_interrupts(rdev);
7454 
7455 	return ret;
7456 }
7457 
7458 /**
7459  * cik_irq_set - enable/disable interrupt sources
7460  *
7461  * @rdev: radeon_device pointer
7462  *
7463  * Enable interrupt sources on the GPU (vblanks, hpd,
7464  * etc.) (CIK).
7465  * Returns 0 for success, errors for failure.
7466  */
7467 int cik_irq_set(struct radeon_device *rdev)
7468 {
7469 	u32 cp_int_cntl;
7470 	u32 cp_m1p0;
7471 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7472 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7473 	u32 grbm_int_cntl = 0;
7474 	u32 dma_cntl, dma_cntl1;
7475 
7476 	if (!rdev->irq.installed) {
7477 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7478 		return -EINVAL;
7479 	}
7480 	/* don't enable anything if the ih is disabled */
7481 	if (!rdev->ih.enabled) {
7482 		cik_disable_interrupts(rdev);
7483 		/* force the active interrupt state to all disabled */
7484 		cik_disable_interrupt_state(rdev);
7485 		return 0;
7486 	}
7487 
7488 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7489 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7490 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7491 
7492 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7493 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7494 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7495 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7496 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7497 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7498 
7499 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7500 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7501 
7502 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7503 
7504 	/* enable CP interrupts on all rings */
7505 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7506 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7507 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7508 	}
7509 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7510 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7511 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7512 		if (ring->me == 1) {
7513 			switch (ring->pipe) {
7514 			case 0:
7515 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7516 				break;
7517 			default:
7518 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7519 				break;
7520 			}
7521 		} else {
7522 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7523 		}
7524 	}
7525 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7526 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7527 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7528 		if (ring->me == 1) {
7529 			switch (ring->pipe) {
7530 			case 0:
7531 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7532 				break;
7533 			default:
7534 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7535 				break;
7536 			}
7537 		} else {
7538 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7539 		}
7540 	}
7541 
7542 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7543 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7544 		dma_cntl |= TRAP_ENABLE;
7545 	}
7546 
7547 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7548 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7549 		dma_cntl1 |= TRAP_ENABLE;
7550 	}
7551 
7552 	if (rdev->irq.crtc_vblank_int[0] ||
7553 	    atomic_read(&rdev->irq.pflip[0])) {
7554 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7555 		crtc1 |= VBLANK_INTERRUPT_MASK;
7556 	}
7557 	if (rdev->irq.crtc_vblank_int[1] ||
7558 	    atomic_read(&rdev->irq.pflip[1])) {
7559 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7560 		crtc2 |= VBLANK_INTERRUPT_MASK;
7561 	}
7562 	if (rdev->irq.crtc_vblank_int[2] ||
7563 	    atomic_read(&rdev->irq.pflip[2])) {
7564 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7565 		crtc3 |= VBLANK_INTERRUPT_MASK;
7566 	}
7567 	if (rdev->irq.crtc_vblank_int[3] ||
7568 	    atomic_read(&rdev->irq.pflip[3])) {
7569 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7570 		crtc4 |= VBLANK_INTERRUPT_MASK;
7571 	}
7572 	if (rdev->irq.crtc_vblank_int[4] ||
7573 	    atomic_read(&rdev->irq.pflip[4])) {
7574 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7575 		crtc5 |= VBLANK_INTERRUPT_MASK;
7576 	}
7577 	if (rdev->irq.crtc_vblank_int[5] ||
7578 	    atomic_read(&rdev->irq.pflip[5])) {
7579 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7580 		crtc6 |= VBLANK_INTERRUPT_MASK;
7581 	}
7582 	if (rdev->irq.hpd[0]) {
7583 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7584 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7585 	}
7586 	if (rdev->irq.hpd[1]) {
7587 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7588 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7589 	}
7590 	if (rdev->irq.hpd[2]) {
7591 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7592 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7593 	}
7594 	if (rdev->irq.hpd[3]) {
7595 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7596 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7597 	}
7598 	if (rdev->irq.hpd[4]) {
7599 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7600 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7601 	}
7602 	if (rdev->irq.hpd[5]) {
7603 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7604 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7605 	}
7606 
7607 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7608 
7609 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7610 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7611 
7612 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7613 
7614 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7615 
7616 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7617 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7618 	if (rdev->num_crtc >= 4) {
7619 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7620 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7621 	}
7622 	if (rdev->num_crtc >= 6) {
7623 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7624 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7625 	}
7626 
7627 	if (rdev->num_crtc >= 2) {
7628 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7629 		       GRPH_PFLIP_INT_MASK);
7630 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7631 		       GRPH_PFLIP_INT_MASK);
7632 	}
7633 	if (rdev->num_crtc >= 4) {
7634 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7635 		       GRPH_PFLIP_INT_MASK);
7636 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7637 		       GRPH_PFLIP_INT_MASK);
7638 	}
7639 	if (rdev->num_crtc >= 6) {
7640 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7641 		       GRPH_PFLIP_INT_MASK);
7642 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7643 		       GRPH_PFLIP_INT_MASK);
7644 	}
7645 
7646 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7647 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7648 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7649 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7650 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7651 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7652 
7653 	/* posting read */
7654 	RREG32(SRBM_STATUS);
7655 
7656 	return 0;
7657 }
7658 
7659 /**
7660  * cik_irq_ack - ack interrupt sources
7661  *
7662  * @rdev: radeon_device pointer
7663  *
7664  * Ack interrupt sources on the GPU (vblanks, hpd,
7665  * etc.) (CIK).  Certain interrupts sources are sw
7666  * generated and do not require an explicit ack.
7667  */
7668 static inline void cik_irq_ack(struct radeon_device *rdev)
7669 {
7670 	u32 tmp;
7671 
7672 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7673 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7674 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7675 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7676 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7677 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7678 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7679 
7680 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7681 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7682 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7683 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7684 	if (rdev->num_crtc >= 4) {
7685 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7686 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7687 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7688 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7689 	}
7690 	if (rdev->num_crtc >= 6) {
7691 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7692 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7693 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7694 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7695 	}
7696 
7697 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7698 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7699 		       GRPH_PFLIP_INT_CLEAR);
7700 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7701 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7702 		       GRPH_PFLIP_INT_CLEAR);
7703 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7704 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7705 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7706 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7707 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7708 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7709 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7710 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7711 
7712 	if (rdev->num_crtc >= 4) {
7713 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7714 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7715 			       GRPH_PFLIP_INT_CLEAR);
7716 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7717 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7718 			       GRPH_PFLIP_INT_CLEAR);
7719 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7720 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7721 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7722 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7723 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7724 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7725 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7726 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7727 	}
7728 
7729 	if (rdev->num_crtc >= 6) {
7730 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7731 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7732 			       GRPH_PFLIP_INT_CLEAR);
7733 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7734 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7735 			       GRPH_PFLIP_INT_CLEAR);
7736 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7737 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7738 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7739 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7740 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7741 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7742 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7743 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7744 	}
7745 
7746 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7747 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7748 		tmp |= DC_HPDx_INT_ACK;
7749 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7750 	}
7751 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7752 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7753 		tmp |= DC_HPDx_INT_ACK;
7754 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7755 	}
7756 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7757 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7758 		tmp |= DC_HPDx_INT_ACK;
7759 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7760 	}
7761 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7762 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7763 		tmp |= DC_HPDx_INT_ACK;
7764 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7765 	}
7766 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7767 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7768 		tmp |= DC_HPDx_INT_ACK;
7769 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7770 	}
7771 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7772 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7773 		tmp |= DC_HPDx_INT_ACK;
7774 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7775 	}
7776 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7777 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7778 		tmp |= DC_HPDx_RX_INT_ACK;
7779 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7780 	}
7781 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7782 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7783 		tmp |= DC_HPDx_RX_INT_ACK;
7784 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7785 	}
7786 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7787 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7788 		tmp |= DC_HPDx_RX_INT_ACK;
7789 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7790 	}
7791 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7792 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7793 		tmp |= DC_HPDx_RX_INT_ACK;
7794 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7795 	}
7796 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7797 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7798 		tmp |= DC_HPDx_RX_INT_ACK;
7799 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7800 	}
7801 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7802 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7803 		tmp |= DC_HPDx_RX_INT_ACK;
7804 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7805 	}
7806 }
7807 
7808 /**
7809  * cik_irq_disable - disable interrupts
7810  *
7811  * @rdev: radeon_device pointer
7812  *
7813  * Disable interrupts on the hw (CIK).
7814  */
7815 static void cik_irq_disable(struct radeon_device *rdev)
7816 {
7817 	cik_disable_interrupts(rdev);
7818 	/* Wait and acknowledge irq */
7819 	mdelay(1);
7820 	cik_irq_ack(rdev);
7821 	cik_disable_interrupt_state(rdev);
7822 }
7823 
7824 /**
7825  * cik_irq_disable - disable interrupts for suspend
7826  *
7827  * @rdev: radeon_device pointer
7828  *
7829  * Disable interrupts and stop the RLC (CIK).
7830  * Used for suspend.
7831  */
7832 static void cik_irq_suspend(struct radeon_device *rdev)
7833 {
7834 	cik_irq_disable(rdev);
7835 	cik_rlc_stop(rdev);
7836 }
7837 
7838 /**
7839  * cik_irq_fini - tear down interrupt support
7840  *
7841  * @rdev: radeon_device pointer
7842  *
7843  * Disable interrupts on the hw and free the IH ring
7844  * buffer (CIK).
7845  * Used for driver unload.
7846  */
7847 static void cik_irq_fini(struct radeon_device *rdev)
7848 {
7849 	cik_irq_suspend(rdev);
7850 	r600_ih_ring_fini(rdev);
7851 }
7852 
7853 /**
7854  * cik_get_ih_wptr - get the IH ring buffer wptr
7855  *
7856  * @rdev: radeon_device pointer
7857  *
7858  * Get the IH ring buffer wptr from either the register
7859  * or the writeback memory buffer (CIK).  Also check for
7860  * ring buffer overflow and deal with it.
7861  * Used by cik_irq_process().
7862  * Returns the value of the wptr.
7863  */
7864 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7865 {
7866 	u32 wptr, tmp;
7867 
7868 	if (rdev->wb.enabled)
7869 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7870 	else
7871 		wptr = RREG32(IH_RB_WPTR);
7872 
7873 	if (wptr & RB_OVERFLOW) {
7874 		wptr &= ~RB_OVERFLOW;
7875 		/* When a ring buffer overflow happen start parsing interrupt
7876 		 * from the last not overwritten vector (wptr + 16). Hopefully
7877 		 * this should allow us to catchup.
7878 		 */
7879 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7880 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7881 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7882 		tmp = RREG32(IH_RB_CNTL);
7883 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7884 		WREG32(IH_RB_CNTL, tmp);
7885 	}
7886 	return (wptr & rdev->ih.ptr_mask);
7887 }
7888 
7889 /*        CIK IV Ring
7890  * Each IV ring entry is 128 bits:
7891  * [7:0]    - interrupt source id
7892  * [31:8]   - reserved
7893  * [59:32]  - interrupt source data
7894  * [63:60]  - reserved
7895  * [71:64]  - RINGID
7896  *            CP:
7897  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7898  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7899  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7900  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7901  *            PIPE_ID - ME0 0=3D
7902  *                    - ME1&2 compute dispatcher (4 pipes each)
7903  *            SDMA:
7904  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7905  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7906  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7907  * [79:72]  - VMID
7908  * [95:80]  - PASID
7909  * [127:96] - reserved
7910  */
7911 /**
7912  * cik_irq_process - interrupt handler
7913  *
7914  * @rdev: radeon_device pointer
7915  *
7916  * Interrupt hander (CIK).  Walk the IH ring,
7917  * ack interrupts and schedule work to handle
7918  * interrupt events.
7919  * Returns irq process return code.
7920  */
7921 int cik_irq_process(struct radeon_device *rdev)
7922 {
7923 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7924 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7925 	u32 wptr;
7926 	u32 rptr;
7927 	u32 src_id, src_data, ring_id;
7928 	u8 me_id, pipe_id, queue_id;
7929 	u32 ring_index;
7930 	bool queue_hotplug = false;
7931 	bool queue_dp = false;
7932 	bool queue_reset = false;
7933 	u32 addr, status, mc_client;
7934 	bool queue_thermal = false;
7935 
7936 	if (!rdev->ih.enabled || rdev->shutdown)
7937 		return IRQ_NONE;
7938 
7939 	wptr = cik_get_ih_wptr(rdev);
7940 
7941 restart_ih:
7942 	/* is somebody else already processing irqs? */
7943 	if (atomic_xchg(&rdev->ih.lock, 1))
7944 		return IRQ_NONE;
7945 
7946 	rptr = rdev->ih.rptr;
7947 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7948 
7949 	/* Order reading of wptr vs. reading of IH ring data */
7950 	rmb();
7951 
7952 	/* display interrupts */
7953 	cik_irq_ack(rdev);
7954 
7955 	while (rptr != wptr) {
7956 		/* wptr/rptr are in bytes! */
7957 		ring_index = rptr / 4;
7958 
7959 		radeon_kfd_interrupt(rdev,
7960 				(const void *) &rdev->ih.ring[ring_index]);
7961 
7962 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7963 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7964 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7965 
7966 		switch (src_id) {
7967 		case 1: /* D1 vblank/vline */
7968 			switch (src_data) {
7969 			case 0: /* D1 vblank */
7970 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7971 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7972 
7973 				if (rdev->irq.crtc_vblank_int[0]) {
7974 					drm_handle_vblank(rdev->ddev, 0);
7975 					rdev->pm.vblank_sync = true;
7976 					wake_up(&rdev->irq.vblank_queue);
7977 				}
7978 				if (atomic_read(&rdev->irq.pflip[0]))
7979 					radeon_crtc_handle_vblank(rdev, 0);
7980 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7981 				DRM_DEBUG("IH: D1 vblank\n");
7982 
7983 				break;
7984 			case 1: /* D1 vline */
7985 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7986 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7987 
7988 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7989 				DRM_DEBUG("IH: D1 vline\n");
7990 
7991 				break;
7992 			default:
7993 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7994 				break;
7995 			}
7996 			break;
7997 		case 2: /* D2 vblank/vline */
7998 			switch (src_data) {
7999 			case 0: /* D2 vblank */
8000 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
8001 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8002 
8003 				if (rdev->irq.crtc_vblank_int[1]) {
8004 					drm_handle_vblank(rdev->ddev, 1);
8005 					rdev->pm.vblank_sync = true;
8006 					wake_up(&rdev->irq.vblank_queue);
8007 				}
8008 				if (atomic_read(&rdev->irq.pflip[1]))
8009 					radeon_crtc_handle_vblank(rdev, 1);
8010 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8011 				DRM_DEBUG("IH: D2 vblank\n");
8012 
8013 				break;
8014 			case 1: /* D2 vline */
8015 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8016 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8017 
8018 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8019 				DRM_DEBUG("IH: D2 vline\n");
8020 
8021 				break;
8022 			default:
8023 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8024 				break;
8025 			}
8026 			break;
8027 		case 3: /* D3 vblank/vline */
8028 			switch (src_data) {
8029 			case 0: /* D3 vblank */
8030 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8031 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8032 
8033 				if (rdev->irq.crtc_vblank_int[2]) {
8034 					drm_handle_vblank(rdev->ddev, 2);
8035 					rdev->pm.vblank_sync = true;
8036 					wake_up(&rdev->irq.vblank_queue);
8037 				}
8038 				if (atomic_read(&rdev->irq.pflip[2]))
8039 					radeon_crtc_handle_vblank(rdev, 2);
8040 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8041 				DRM_DEBUG("IH: D3 vblank\n");
8042 
8043 				break;
8044 			case 1: /* D3 vline */
8045 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8046 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8047 
8048 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8049 				DRM_DEBUG("IH: D3 vline\n");
8050 
8051 				break;
8052 			default:
8053 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8054 				break;
8055 			}
8056 			break;
8057 		case 4: /* D4 vblank/vline */
8058 			switch (src_data) {
8059 			case 0: /* D4 vblank */
8060 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8061 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8062 
8063 				if (rdev->irq.crtc_vblank_int[3]) {
8064 					drm_handle_vblank(rdev->ddev, 3);
8065 					rdev->pm.vblank_sync = true;
8066 					wake_up(&rdev->irq.vblank_queue);
8067 				}
8068 				if (atomic_read(&rdev->irq.pflip[3]))
8069 					radeon_crtc_handle_vblank(rdev, 3);
8070 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8071 				DRM_DEBUG("IH: D4 vblank\n");
8072 
8073 				break;
8074 			case 1: /* D4 vline */
8075 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8076 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8077 
8078 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8079 				DRM_DEBUG("IH: D4 vline\n");
8080 
8081 				break;
8082 			default:
8083 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8084 				break;
8085 			}
8086 			break;
8087 		case 5: /* D5 vblank/vline */
8088 			switch (src_data) {
8089 			case 0: /* D5 vblank */
8090 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8091 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8092 
8093 				if (rdev->irq.crtc_vblank_int[4]) {
8094 					drm_handle_vblank(rdev->ddev, 4);
8095 					rdev->pm.vblank_sync = true;
8096 					wake_up(&rdev->irq.vblank_queue);
8097 				}
8098 				if (atomic_read(&rdev->irq.pflip[4]))
8099 					radeon_crtc_handle_vblank(rdev, 4);
8100 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8101 				DRM_DEBUG("IH: D5 vblank\n");
8102 
8103 				break;
8104 			case 1: /* D5 vline */
8105 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8106 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8107 
8108 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8109 				DRM_DEBUG("IH: D5 vline\n");
8110 
8111 				break;
8112 			default:
8113 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8114 				break;
8115 			}
8116 			break;
8117 		case 6: /* D6 vblank/vline */
8118 			switch (src_data) {
8119 			case 0: /* D6 vblank */
8120 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8121 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8122 
8123 				if (rdev->irq.crtc_vblank_int[5]) {
8124 					drm_handle_vblank(rdev->ddev, 5);
8125 					rdev->pm.vblank_sync = true;
8126 					wake_up(&rdev->irq.vblank_queue);
8127 				}
8128 				if (atomic_read(&rdev->irq.pflip[5]))
8129 					radeon_crtc_handle_vblank(rdev, 5);
8130 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8131 				DRM_DEBUG("IH: D6 vblank\n");
8132 
8133 				break;
8134 			case 1: /* D6 vline */
8135 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8136 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8137 
8138 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8139 				DRM_DEBUG("IH: D6 vline\n");
8140 
8141 				break;
8142 			default:
8143 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8144 				break;
8145 			}
8146 			break;
8147 		case 8: /* D1 page flip */
8148 		case 10: /* D2 page flip */
8149 		case 12: /* D3 page flip */
8150 		case 14: /* D4 page flip */
8151 		case 16: /* D5 page flip */
8152 		case 18: /* D6 page flip */
8153 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8154 			if (radeon_use_pflipirq > 0)
8155 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8156 			break;
8157 		case 42: /* HPD hotplug */
8158 			switch (src_data) {
8159 			case 0:
8160 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8161 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8162 
8163 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8164 				queue_hotplug = true;
8165 				DRM_DEBUG("IH: HPD1\n");
8166 
8167 				break;
8168 			case 1:
8169 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8170 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8171 
8172 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8173 				queue_hotplug = true;
8174 				DRM_DEBUG("IH: HPD2\n");
8175 
8176 				break;
8177 			case 2:
8178 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8179 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8180 
8181 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8182 				queue_hotplug = true;
8183 				DRM_DEBUG("IH: HPD3\n");
8184 
8185 				break;
8186 			case 3:
8187 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8188 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8189 
8190 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8191 				queue_hotplug = true;
8192 				DRM_DEBUG("IH: HPD4\n");
8193 
8194 				break;
8195 			case 4:
8196 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8197 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8198 
8199 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8200 				queue_hotplug = true;
8201 				DRM_DEBUG("IH: HPD5\n");
8202 
8203 				break;
8204 			case 5:
8205 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8206 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8207 
8208 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8209 				queue_hotplug = true;
8210 				DRM_DEBUG("IH: HPD6\n");
8211 
8212 				break;
8213 			case 6:
8214 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8215 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8216 
8217 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8218 				queue_dp = true;
8219 				DRM_DEBUG("IH: HPD_RX 1\n");
8220 
8221 				break;
8222 			case 7:
8223 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8224 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8225 
8226 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8227 				queue_dp = true;
8228 				DRM_DEBUG("IH: HPD_RX 2\n");
8229 
8230 				break;
8231 			case 8:
8232 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8233 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8234 
8235 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8236 				queue_dp = true;
8237 				DRM_DEBUG("IH: HPD_RX 3\n");
8238 
8239 				break;
8240 			case 9:
8241 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8242 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8243 
8244 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8245 				queue_dp = true;
8246 				DRM_DEBUG("IH: HPD_RX 4\n");
8247 
8248 				break;
8249 			case 10:
8250 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8251 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8252 
8253 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8254 				queue_dp = true;
8255 				DRM_DEBUG("IH: HPD_RX 5\n");
8256 
8257 				break;
8258 			case 11:
8259 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8260 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8261 
8262 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8263 				queue_dp = true;
8264 				DRM_DEBUG("IH: HPD_RX 6\n");
8265 
8266 				break;
8267 			default:
8268 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8269 				break;
8270 			}
8271 			break;
8272 		case 96:
8273 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8274 			WREG32(SRBM_INT_ACK, 0x1);
8275 			break;
8276 		case 124: /* UVD */
8277 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8278 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8279 			break;
8280 		case 146:
8281 		case 147:
8282 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8283 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8284 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8285 			/* reset addr and status */
8286 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8287 			if (addr == 0x0 && status == 0x0)
8288 				break;
8289 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8290 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8291 				addr);
8292 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8293 				status);
8294 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8295 			break;
8296 		case 167: /* VCE */
8297 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8298 			switch (src_data) {
8299 			case 0:
8300 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8301 				break;
8302 			case 1:
8303 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8304 				break;
8305 			default:
8306 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8307 				break;
8308 			}
8309 			break;
8310 		case 176: /* GFX RB CP_INT */
8311 		case 177: /* GFX IB CP_INT */
8312 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8313 			break;
8314 		case 181: /* CP EOP event */
8315 			DRM_DEBUG("IH: CP EOP\n");
8316 			/* XXX check the bitfield order! */
8317 			me_id = (ring_id & 0x60) >> 5;
8318 			pipe_id = (ring_id & 0x18) >> 3;
8319 			queue_id = (ring_id & 0x7) >> 0;
8320 			switch (me_id) {
8321 			case 0:
8322 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8323 				break;
8324 			case 1:
8325 			case 2:
8326 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8327 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8328 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8329 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8330 				break;
8331 			}
8332 			break;
8333 		case 184: /* CP Privileged reg access */
8334 			DRM_ERROR("Illegal register access in command stream\n");
8335 			/* XXX check the bitfield order! */
8336 			me_id = (ring_id & 0x60) >> 5;
8337 			pipe_id = (ring_id & 0x18) >> 3;
8338 			queue_id = (ring_id & 0x7) >> 0;
8339 			switch (me_id) {
8340 			case 0:
8341 				/* This results in a full GPU reset, but all we need to do is soft
8342 				 * reset the CP for gfx
8343 				 */
8344 				queue_reset = true;
8345 				break;
8346 			case 1:
8347 				/* XXX compute */
8348 				queue_reset = true;
8349 				break;
8350 			case 2:
8351 				/* XXX compute */
8352 				queue_reset = true;
8353 				break;
8354 			}
8355 			break;
8356 		case 185: /* CP Privileged inst */
8357 			DRM_ERROR("Illegal instruction in command stream\n");
8358 			/* XXX check the bitfield order! */
8359 			me_id = (ring_id & 0x60) >> 5;
8360 			pipe_id = (ring_id & 0x18) >> 3;
8361 			queue_id = (ring_id & 0x7) >> 0;
8362 			switch (me_id) {
8363 			case 0:
8364 				/* This results in a full GPU reset, but all we need to do is soft
8365 				 * reset the CP for gfx
8366 				 */
8367 				queue_reset = true;
8368 				break;
8369 			case 1:
8370 				/* XXX compute */
8371 				queue_reset = true;
8372 				break;
8373 			case 2:
8374 				/* XXX compute */
8375 				queue_reset = true;
8376 				break;
8377 			}
8378 			break;
8379 		case 224: /* SDMA trap event */
8380 			/* XXX check the bitfield order! */
8381 			me_id = (ring_id & 0x3) >> 0;
8382 			queue_id = (ring_id & 0xc) >> 2;
8383 			DRM_DEBUG("IH: SDMA trap\n");
8384 			switch (me_id) {
8385 			case 0:
8386 				switch (queue_id) {
8387 				case 0:
8388 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8389 					break;
8390 				case 1:
8391 					/* XXX compute */
8392 					break;
8393 				case 2:
8394 					/* XXX compute */
8395 					break;
8396 				}
8397 				break;
8398 			case 1:
8399 				switch (queue_id) {
8400 				case 0:
8401 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8402 					break;
8403 				case 1:
8404 					/* XXX compute */
8405 					break;
8406 				case 2:
8407 					/* XXX compute */
8408 					break;
8409 				}
8410 				break;
8411 			}
8412 			break;
8413 		case 230: /* thermal low to high */
8414 			DRM_DEBUG("IH: thermal low to high\n");
8415 			rdev->pm.dpm.thermal.high_to_low = false;
8416 			queue_thermal = true;
8417 			break;
8418 		case 231: /* thermal high to low */
8419 			DRM_DEBUG("IH: thermal high to low\n");
8420 			rdev->pm.dpm.thermal.high_to_low = true;
8421 			queue_thermal = true;
8422 			break;
8423 		case 233: /* GUI IDLE */
8424 			DRM_DEBUG("IH: GUI idle\n");
8425 			break;
8426 		case 241: /* SDMA Privileged inst */
8427 		case 247: /* SDMA Privileged inst */
8428 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8429 			/* XXX check the bitfield order! */
8430 			me_id = (ring_id & 0x3) >> 0;
8431 			queue_id = (ring_id & 0xc) >> 2;
8432 			switch (me_id) {
8433 			case 0:
8434 				switch (queue_id) {
8435 				case 0:
8436 					queue_reset = true;
8437 					break;
8438 				case 1:
8439 					/* XXX compute */
8440 					queue_reset = true;
8441 					break;
8442 				case 2:
8443 					/* XXX compute */
8444 					queue_reset = true;
8445 					break;
8446 				}
8447 				break;
8448 			case 1:
8449 				switch (queue_id) {
8450 				case 0:
8451 					queue_reset = true;
8452 					break;
8453 				case 1:
8454 					/* XXX compute */
8455 					queue_reset = true;
8456 					break;
8457 				case 2:
8458 					/* XXX compute */
8459 					queue_reset = true;
8460 					break;
8461 				}
8462 				break;
8463 			}
8464 			break;
8465 		default:
8466 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8467 			break;
8468 		}
8469 
8470 		/* wptr/rptr are in bytes! */
8471 		rptr += 16;
8472 		rptr &= rdev->ih.ptr_mask;
8473 		WREG32(IH_RB_RPTR, rptr);
8474 	}
8475 	if (queue_dp)
8476 		schedule_work(&rdev->dp_work);
8477 	if (queue_hotplug)
8478 		schedule_delayed_work(&rdev->hotplug_work, 0);
8479 	if (queue_reset) {
8480 		rdev->needs_reset = true;
8481 		wake_up_all(&rdev->fence_queue);
8482 	}
8483 	if (queue_thermal)
8484 		schedule_work(&rdev->pm.dpm.thermal.work);
8485 	rdev->ih.rptr = rptr;
8486 	atomic_set(&rdev->ih.lock, 0);
8487 
8488 	/* make sure wptr hasn't changed while processing */
8489 	wptr = cik_get_ih_wptr(rdev);
8490 	if (wptr != rptr)
8491 		goto restart_ih;
8492 
8493 	return IRQ_HANDLED;
8494 }
8495 
8496 /*
8497  * startup/shutdown callbacks
8498  */
8499 /**
8500  * cik_startup - program the asic to a functional state
8501  *
8502  * @rdev: radeon_device pointer
8503  *
8504  * Programs the asic to a functional state (CIK).
8505  * Called by cik_init() and cik_resume().
8506  * Returns 0 for success, error for failure.
8507  */
8508 static int cik_startup(struct radeon_device *rdev)
8509 {
8510 	struct radeon_ring *ring;
8511 	u32 nop;
8512 	int r;
8513 
8514 	/* enable pcie gen2/3 link */
8515 	cik_pcie_gen3_enable(rdev);
8516 	/* enable aspm */
8517 	cik_program_aspm(rdev);
8518 
8519 	/* scratch needs to be initialized before MC */
8520 	r = r600_vram_scratch_init(rdev);
8521 	if (r)
8522 		return r;
8523 
8524 	cik_mc_program(rdev);
8525 
8526 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8527 		r = ci_mc_load_microcode(rdev);
8528 		if (r) {
8529 			DRM_ERROR("Failed to load MC firmware!\n");
8530 			return r;
8531 		}
8532 	}
8533 
8534 	r = cik_pcie_gart_enable(rdev);
8535 	if (r)
8536 		return r;
8537 	cik_gpu_init(rdev);
8538 
8539 	/* allocate rlc buffers */
8540 	if (rdev->flags & RADEON_IS_IGP) {
8541 		if (rdev->family == CHIP_KAVERI) {
8542 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8543 			rdev->rlc.reg_list_size =
8544 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8545 		} else {
8546 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8547 			rdev->rlc.reg_list_size =
8548 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8549 		}
8550 	}
8551 	rdev->rlc.cs_data = ci_cs_data;
8552 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8553 	r = sumo_rlc_init(rdev);
8554 	if (r) {
8555 		DRM_ERROR("Failed to init rlc BOs!\n");
8556 		return r;
8557 	}
8558 
8559 	/* allocate wb buffer */
8560 	r = radeon_wb_init(rdev);
8561 	if (r)
8562 		return r;
8563 
8564 	/* allocate mec buffers */
8565 	r = cik_mec_init(rdev);
8566 	if (r) {
8567 		DRM_ERROR("Failed to init MEC BOs!\n");
8568 		return r;
8569 	}
8570 
8571 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8572 	if (r) {
8573 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8574 		return r;
8575 	}
8576 
8577 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8578 	if (r) {
8579 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8580 		return r;
8581 	}
8582 
8583 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8584 	if (r) {
8585 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8586 		return r;
8587 	}
8588 
8589 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8590 	if (r) {
8591 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8592 		return r;
8593 	}
8594 
8595 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8596 	if (r) {
8597 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8598 		return r;
8599 	}
8600 
8601 	r = radeon_uvd_resume(rdev);
8602 	if (!r) {
8603 		r = uvd_v4_2_resume(rdev);
8604 		if (!r) {
8605 			r = radeon_fence_driver_start_ring(rdev,
8606 							   R600_RING_TYPE_UVD_INDEX);
8607 			if (r)
8608 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8609 		}
8610 	}
8611 	if (r)
8612 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8613 
8614 	r = radeon_vce_resume(rdev);
8615 	if (!r) {
8616 		r = vce_v2_0_resume(rdev);
8617 		if (!r)
8618 			r = radeon_fence_driver_start_ring(rdev,
8619 							   TN_RING_TYPE_VCE1_INDEX);
8620 		if (!r)
8621 			r = radeon_fence_driver_start_ring(rdev,
8622 							   TN_RING_TYPE_VCE2_INDEX);
8623 	}
8624 	if (r) {
8625 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8626 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8627 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8628 	}
8629 
8630 	/* Enable IRQ */
8631 	if (!rdev->irq.installed) {
8632 		r = radeon_irq_kms_init(rdev);
8633 		if (r)
8634 			return r;
8635 	}
8636 
8637 	r = cik_irq_init(rdev);
8638 	if (r) {
8639 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8640 		radeon_irq_kms_fini(rdev);
8641 		return r;
8642 	}
8643 	cik_irq_set(rdev);
8644 
8645 	if (rdev->family == CHIP_HAWAII) {
8646 		if (rdev->new_fw)
8647 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8648 		else
8649 			nop = RADEON_CP_PACKET2;
8650 	} else {
8651 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8652 	}
8653 
8654 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8655 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8656 			     nop);
8657 	if (r)
8658 		return r;
8659 
8660 	/* set up the compute queues */
8661 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8662 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8663 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8664 			     nop);
8665 	if (r)
8666 		return r;
8667 	ring->me = 1; /* first MEC */
8668 	ring->pipe = 0; /* first pipe */
8669 	ring->queue = 0; /* first queue */
8670 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8671 
8672 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8673 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8674 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8675 			     nop);
8676 	if (r)
8677 		return r;
8678 	/* dGPU only have 1 MEC */
8679 	ring->me = 1; /* first MEC */
8680 	ring->pipe = 0; /* first pipe */
8681 	ring->queue = 1; /* second queue */
8682 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8683 
8684 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8686 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8687 	if (r)
8688 		return r;
8689 
8690 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8691 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8692 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8693 	if (r)
8694 		return r;
8695 
8696 	r = cik_cp_resume(rdev);
8697 	if (r)
8698 		return r;
8699 
8700 	r = cik_sdma_resume(rdev);
8701 	if (r)
8702 		return r;
8703 
8704 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8705 	if (ring->ring_size) {
8706 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8707 				     RADEON_CP_PACKET2);
8708 		if (!r)
8709 			r = uvd_v1_0_init(rdev);
8710 		if (r)
8711 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8712 	}
8713 
8714 	r = -ENOENT;
8715 
8716 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8717 	if (ring->ring_size)
8718 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8719 				     VCE_CMD_NO_OP);
8720 
8721 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8722 	if (ring->ring_size)
8723 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8724 				     VCE_CMD_NO_OP);
8725 
8726 	if (!r)
8727 		r = vce_v1_0_init(rdev);
8728 	else if (r != -ENOENT)
8729 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8730 
8731 	r = radeon_ib_pool_init(rdev);
8732 	if (r) {
8733 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8734 		return r;
8735 	}
8736 
8737 	r = radeon_vm_manager_init(rdev);
8738 	if (r) {
8739 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8740 		return r;
8741 	}
8742 
8743 	r = radeon_audio_init(rdev);
8744 	if (r)
8745 		return r;
8746 
8747 	r = radeon_kfd_resume(rdev);
8748 	if (r)
8749 		return r;
8750 
8751 	return 0;
8752 }
8753 
8754 /**
8755  * cik_resume - resume the asic to a functional state
8756  *
8757  * @rdev: radeon_device pointer
8758  *
8759  * Programs the asic to a functional state (CIK).
8760  * Called at resume.
8761  * Returns 0 for success, error for failure.
8762  */
8763 int cik_resume(struct radeon_device *rdev)
8764 {
8765 	int r;
8766 
8767 	/* post card */
8768 	atom_asic_init(rdev->mode_info.atom_context);
8769 
8770 	/* init golden registers */
8771 	cik_init_golden_registers(rdev);
8772 
8773 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8774 		radeon_pm_resume(rdev);
8775 
8776 	rdev->accel_working = true;
8777 	r = cik_startup(rdev);
8778 	if (r) {
8779 		DRM_ERROR("cik startup failed on resume\n");
8780 		rdev->accel_working = false;
8781 		return r;
8782 	}
8783 
8784 	return r;
8785 
8786 }
8787 
8788 /**
8789  * cik_suspend - suspend the asic
8790  *
8791  * @rdev: radeon_device pointer
8792  *
8793  * Bring the chip into a state suitable for suspend (CIK).
8794  * Called at suspend.
8795  * Returns 0 for success.
8796  */
8797 int cik_suspend(struct radeon_device *rdev)
8798 {
8799 	radeon_kfd_suspend(rdev);
8800 	radeon_pm_suspend(rdev);
8801 	radeon_audio_fini(rdev);
8802 	radeon_vm_manager_fini(rdev);
8803 	cik_cp_enable(rdev, false);
8804 	cik_sdma_enable(rdev, false);
8805 	uvd_v1_0_fini(rdev);
8806 	radeon_uvd_suspend(rdev);
8807 	radeon_vce_suspend(rdev);
8808 	cik_fini_pg(rdev);
8809 	cik_fini_cg(rdev);
8810 	cik_irq_suspend(rdev);
8811 	radeon_wb_disable(rdev);
8812 	cik_pcie_gart_disable(rdev);
8813 	return 0;
8814 }
8815 
8816 /* Plan is to move initialization in that function and use
8817  * helper function so that radeon_device_init pretty much
8818  * do nothing more than calling asic specific function. This
8819  * should also allow to remove a bunch of callback function
8820  * like vram_info.
8821  */
8822 /**
8823  * cik_init - asic specific driver and hw init
8824  *
8825  * @rdev: radeon_device pointer
8826  *
8827  * Setup asic specific driver variables and program the hw
8828  * to a functional state (CIK).
8829  * Called at driver startup.
8830  * Returns 0 for success, errors for failure.
8831  */
8832 int cik_init(struct radeon_device *rdev)
8833 {
8834 	struct radeon_ring *ring;
8835 	int r;
8836 
8837 	/* Read BIOS */
8838 	if (!radeon_get_bios(rdev)) {
8839 		if (ASIC_IS_AVIVO(rdev))
8840 			return -EINVAL;
8841 	}
8842 	/* Must be an ATOMBIOS */
8843 	if (!rdev->is_atom_bios) {
8844 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8845 		return -EINVAL;
8846 	}
8847 	r = radeon_atombios_init(rdev);
8848 	if (r)
8849 		return r;
8850 
8851 	/* Post card if necessary */
8852 	if (!radeon_card_posted(rdev)) {
8853 		if (!rdev->bios) {
8854 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8855 			return -EINVAL;
8856 		}
8857 		DRM_INFO("GPU not posted. posting now...\n");
8858 		atom_asic_init(rdev->mode_info.atom_context);
8859 	}
8860 	/* init golden registers */
8861 	cik_init_golden_registers(rdev);
8862 	/* Initialize scratch registers */
8863 	cik_scratch_init(rdev);
8864 	/* Initialize surface registers */
8865 	radeon_surface_init(rdev);
8866 	/* Initialize clocks */
8867 	radeon_get_clock_info(rdev->ddev);
8868 
8869 	/* Fence driver */
8870 	r = radeon_fence_driver_init(rdev);
8871 	if (r)
8872 		return r;
8873 
8874 	/* initialize memory controller */
8875 	r = cik_mc_init(rdev);
8876 	if (r)
8877 		return r;
8878 	/* Memory manager */
8879 	r = radeon_bo_init(rdev);
8880 	if (r)
8881 		return r;
8882 
8883 	if (rdev->flags & RADEON_IS_IGP) {
8884 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8885 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8886 			r = cik_init_microcode(rdev);
8887 			if (r) {
8888 				DRM_ERROR("Failed to load firmware!\n");
8889 				return r;
8890 			}
8891 		}
8892 	} else {
8893 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8894 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8895 		    !rdev->mc_fw) {
8896 			r = cik_init_microcode(rdev);
8897 			if (r) {
8898 				DRM_ERROR("Failed to load firmware!\n");
8899 				return r;
8900 			}
8901 		}
8902 	}
8903 
8904 	/* Initialize power management */
8905 	radeon_pm_init(rdev);
8906 
8907 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8908 	ring->ring_obj = NULL;
8909 	r600_ring_init(rdev, ring, 1024 * 1024);
8910 
8911 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8912 	ring->ring_obj = NULL;
8913 	r600_ring_init(rdev, ring, 1024 * 1024);
8914 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8915 	if (r)
8916 		return r;
8917 
8918 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8919 	ring->ring_obj = NULL;
8920 	r600_ring_init(rdev, ring, 1024 * 1024);
8921 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8922 	if (r)
8923 		return r;
8924 
8925 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8926 	ring->ring_obj = NULL;
8927 	r600_ring_init(rdev, ring, 256 * 1024);
8928 
8929 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8930 	ring->ring_obj = NULL;
8931 	r600_ring_init(rdev, ring, 256 * 1024);
8932 
8933 	r = radeon_uvd_init(rdev);
8934 	if (!r) {
8935 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8936 		ring->ring_obj = NULL;
8937 		r600_ring_init(rdev, ring, 4096);
8938 	}
8939 
8940 	r = radeon_vce_init(rdev);
8941 	if (!r) {
8942 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8943 		ring->ring_obj = NULL;
8944 		r600_ring_init(rdev, ring, 4096);
8945 
8946 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8947 		ring->ring_obj = NULL;
8948 		r600_ring_init(rdev, ring, 4096);
8949 	}
8950 
8951 	rdev->ih.ring_obj = NULL;
8952 	r600_ih_ring_init(rdev, 64 * 1024);
8953 
8954 	r = r600_pcie_gart_init(rdev);
8955 	if (r)
8956 		return r;
8957 
8958 	rdev->accel_working = true;
8959 	r = cik_startup(rdev);
8960 	if (r) {
8961 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8962 		cik_cp_fini(rdev);
8963 		cik_sdma_fini(rdev);
8964 		cik_irq_fini(rdev);
8965 		sumo_rlc_fini(rdev);
8966 		cik_mec_fini(rdev);
8967 		radeon_wb_fini(rdev);
8968 		radeon_ib_pool_fini(rdev);
8969 		radeon_vm_manager_fini(rdev);
8970 		radeon_irq_kms_fini(rdev);
8971 		cik_pcie_gart_fini(rdev);
8972 		rdev->accel_working = false;
8973 	}
8974 
8975 	/* Don't start up if the MC ucode is missing.
8976 	 * The default clocks and voltages before the MC ucode
8977 	 * is loaded are not suffient for advanced operations.
8978 	 */
8979 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8980 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8981 		return -EINVAL;
8982 	}
8983 
8984 	return 0;
8985 }
8986 
8987 /**
8988  * cik_fini - asic specific driver and hw fini
8989  *
8990  * @rdev: radeon_device pointer
8991  *
8992  * Tear down the asic specific driver variables and program the hw
8993  * to an idle state (CIK).
8994  * Called at driver unload.
8995  */
8996 void cik_fini(struct radeon_device *rdev)
8997 {
8998 	radeon_pm_fini(rdev);
8999 	cik_cp_fini(rdev);
9000 	cik_sdma_fini(rdev);
9001 	cik_fini_pg(rdev);
9002 	cik_fini_cg(rdev);
9003 	cik_irq_fini(rdev);
9004 	sumo_rlc_fini(rdev);
9005 	cik_mec_fini(rdev);
9006 	radeon_wb_fini(rdev);
9007 	radeon_vm_manager_fini(rdev);
9008 	radeon_ib_pool_fini(rdev);
9009 	radeon_irq_kms_fini(rdev);
9010 	uvd_v1_0_fini(rdev);
9011 	radeon_uvd_fini(rdev);
9012 	radeon_vce_fini(rdev);
9013 	cik_pcie_gart_fini(rdev);
9014 	r600_vram_scratch_fini(rdev);
9015 	radeon_gem_fini(rdev);
9016 	radeon_fence_driver_fini(rdev);
9017 	radeon_bo_fini(rdev);
9018 	radeon_atombios_fini(rdev);
9019 	kfree(rdev->bios);
9020 	rdev->bios = NULL;
9021 }
9022 
9023 void dce8_program_fmt(struct drm_encoder *encoder)
9024 {
9025 	struct drm_device *dev = encoder->dev;
9026 	struct radeon_device *rdev = dev->dev_private;
9027 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9028 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9029 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9030 	int bpc = 0;
9031 	u32 tmp = 0;
9032 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9033 
9034 	if (connector) {
9035 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9036 		bpc = radeon_get_monitor_bpc(connector);
9037 		dither = radeon_connector->dither;
9038 	}
9039 
9040 	/* LVDS/eDP FMT is set up by atom */
9041 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9042 		return;
9043 
9044 	/* not needed for analog */
9045 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9046 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9047 		return;
9048 
9049 	if (bpc == 0)
9050 		return;
9051 
9052 	switch (bpc) {
9053 	case 6:
9054 		if (dither == RADEON_FMT_DITHER_ENABLE)
9055 			/* XXX sort out optimal dither settings */
9056 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9057 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9058 		else
9059 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9060 		break;
9061 	case 8:
9062 		if (dither == RADEON_FMT_DITHER_ENABLE)
9063 			/* XXX sort out optimal dither settings */
9064 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9065 				FMT_RGB_RANDOM_ENABLE |
9066 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9067 		else
9068 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9069 		break;
9070 	case 10:
9071 		if (dither == RADEON_FMT_DITHER_ENABLE)
9072 			/* XXX sort out optimal dither settings */
9073 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9074 				FMT_RGB_RANDOM_ENABLE |
9075 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9076 		else
9077 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9078 		break;
9079 	default:
9080 		/* not needed */
9081 		break;
9082 	}
9083 
9084 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9085 }
9086 
9087 /* display watermark setup */
9088 /**
9089  * dce8_line_buffer_adjust - Set up the line buffer
9090  *
9091  * @rdev: radeon_device pointer
9092  * @radeon_crtc: the selected display controller
9093  * @mode: the current display mode on the selected display
9094  * controller
9095  *
9096  * Setup up the line buffer allocation for
9097  * the selected display controller (CIK).
9098  * Returns the line buffer size in pixels.
9099  */
9100 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9101 				   struct radeon_crtc *radeon_crtc,
9102 				   struct drm_display_mode *mode)
9103 {
9104 	u32 tmp, buffer_alloc, i;
9105 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9106 	/*
9107 	 * Line Buffer Setup
9108 	 * There are 6 line buffers, one for each display controllers.
9109 	 * There are 3 partitions per LB. Select the number of partitions
9110 	 * to enable based on the display width.  For display widths larger
9111 	 * than 4096, you need use to use 2 display controllers and combine
9112 	 * them using the stereo blender.
9113 	 */
9114 	if (radeon_crtc->base.enabled && mode) {
9115 		if (mode->crtc_hdisplay < 1920) {
9116 			tmp = 1;
9117 			buffer_alloc = 2;
9118 		} else if (mode->crtc_hdisplay < 2560) {
9119 			tmp = 2;
9120 			buffer_alloc = 2;
9121 		} else if (mode->crtc_hdisplay < 4096) {
9122 			tmp = 0;
9123 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9124 		} else {
9125 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9126 			tmp = 0;
9127 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9128 		}
9129 	} else {
9130 		tmp = 1;
9131 		buffer_alloc = 0;
9132 	}
9133 
9134 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9135 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9136 
9137 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9138 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9139 	for (i = 0; i < rdev->usec_timeout; i++) {
9140 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9141 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9142 			break;
9143 		udelay(1);
9144 	}
9145 
9146 	if (radeon_crtc->base.enabled && mode) {
9147 		switch (tmp) {
9148 		case 0:
9149 		default:
9150 			return 4096 * 2;
9151 		case 1:
9152 			return 1920 * 2;
9153 		case 2:
9154 			return 2560 * 2;
9155 		}
9156 	}
9157 
9158 	/* controller not enabled, so no lb used */
9159 	return 0;
9160 }
9161 
9162 /**
9163  * cik_get_number_of_dram_channels - get the number of dram channels
9164  *
9165  * @rdev: radeon_device pointer
9166  *
9167  * Look up the number of video ram channels (CIK).
9168  * Used for display watermark bandwidth calculations
9169  * Returns the number of dram channels
9170  */
9171 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9172 {
9173 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9174 
9175 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9176 	case 0:
9177 	default:
9178 		return 1;
9179 	case 1:
9180 		return 2;
9181 	case 2:
9182 		return 4;
9183 	case 3:
9184 		return 8;
9185 	case 4:
9186 		return 3;
9187 	case 5:
9188 		return 6;
9189 	case 6:
9190 		return 10;
9191 	case 7:
9192 		return 12;
9193 	case 8:
9194 		return 16;
9195 	}
9196 }
9197 
9198 struct dce8_wm_params {
9199 	u32 dram_channels; /* number of dram channels */
9200 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9201 	u32 sclk;          /* engine clock in kHz */
9202 	u32 disp_clk;      /* display clock in kHz */
9203 	u32 src_width;     /* viewport width */
9204 	u32 active_time;   /* active display time in ns */
9205 	u32 blank_time;    /* blank time in ns */
9206 	bool interlaced;    /* mode is interlaced */
9207 	fixed20_12 vsc;    /* vertical scale ratio */
9208 	u32 num_heads;     /* number of active crtcs */
9209 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9210 	u32 lb_size;       /* line buffer allocated to pipe */
9211 	u32 vtaps;         /* vertical scaler taps */
9212 };
9213 
9214 /**
9215  * dce8_dram_bandwidth - get the dram bandwidth
9216  *
9217  * @wm: watermark calculation data
9218  *
9219  * Calculate the raw dram bandwidth (CIK).
9220  * Used for display watermark bandwidth calculations
9221  * Returns the dram bandwidth in MBytes/s
9222  */
9223 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9224 {
9225 	/* Calculate raw DRAM Bandwidth */
9226 	fixed20_12 dram_efficiency; /* 0.7 */
9227 	fixed20_12 yclk, dram_channels, bandwidth;
9228 	fixed20_12 a;
9229 
9230 	a.full = dfixed_const(1000);
9231 	yclk.full = dfixed_const(wm->yclk);
9232 	yclk.full = dfixed_div(yclk, a);
9233 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9234 	a.full = dfixed_const(10);
9235 	dram_efficiency.full = dfixed_const(7);
9236 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9237 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9238 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9239 
9240 	return dfixed_trunc(bandwidth);
9241 }
9242 
9243 /**
9244  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9245  *
9246  * @wm: watermark calculation data
9247  *
9248  * Calculate the dram bandwidth used for display (CIK).
9249  * Used for display watermark bandwidth calculations
9250  * Returns the dram bandwidth for display in MBytes/s
9251  */
9252 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9253 {
9254 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9255 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9256 	fixed20_12 yclk, dram_channels, bandwidth;
9257 	fixed20_12 a;
9258 
9259 	a.full = dfixed_const(1000);
9260 	yclk.full = dfixed_const(wm->yclk);
9261 	yclk.full = dfixed_div(yclk, a);
9262 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9263 	a.full = dfixed_const(10);
9264 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9265 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9266 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9267 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9268 
9269 	return dfixed_trunc(bandwidth);
9270 }
9271 
9272 /**
9273  * dce8_data_return_bandwidth - get the data return bandwidth
9274  *
9275  * @wm: watermark calculation data
9276  *
9277  * Calculate the data return bandwidth used for display (CIK).
9278  * Used for display watermark bandwidth calculations
9279  * Returns the data return bandwidth in MBytes/s
9280  */
9281 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9282 {
9283 	/* Calculate the display Data return Bandwidth */
9284 	fixed20_12 return_efficiency; /* 0.8 */
9285 	fixed20_12 sclk, bandwidth;
9286 	fixed20_12 a;
9287 
9288 	a.full = dfixed_const(1000);
9289 	sclk.full = dfixed_const(wm->sclk);
9290 	sclk.full = dfixed_div(sclk, a);
9291 	a.full = dfixed_const(10);
9292 	return_efficiency.full = dfixed_const(8);
9293 	return_efficiency.full = dfixed_div(return_efficiency, a);
9294 	a.full = dfixed_const(32);
9295 	bandwidth.full = dfixed_mul(a, sclk);
9296 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9297 
9298 	return dfixed_trunc(bandwidth);
9299 }
9300 
9301 /**
9302  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9303  *
9304  * @wm: watermark calculation data
9305  *
9306  * Calculate the dmif bandwidth used for display (CIK).
9307  * Used for display watermark bandwidth calculations
9308  * Returns the dmif bandwidth in MBytes/s
9309  */
9310 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9311 {
9312 	/* Calculate the DMIF Request Bandwidth */
9313 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9314 	fixed20_12 disp_clk, bandwidth;
9315 	fixed20_12 a, b;
9316 
9317 	a.full = dfixed_const(1000);
9318 	disp_clk.full = dfixed_const(wm->disp_clk);
9319 	disp_clk.full = dfixed_div(disp_clk, a);
9320 	a.full = dfixed_const(32);
9321 	b.full = dfixed_mul(a, disp_clk);
9322 
9323 	a.full = dfixed_const(10);
9324 	disp_clk_request_efficiency.full = dfixed_const(8);
9325 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9326 
9327 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9328 
9329 	return dfixed_trunc(bandwidth);
9330 }
9331 
9332 /**
9333  * dce8_available_bandwidth - get the min available bandwidth
9334  *
9335  * @wm: watermark calculation data
9336  *
9337  * Calculate the min available bandwidth used for display (CIK).
9338  * Used for display watermark bandwidth calculations
9339  * Returns the min available bandwidth in MBytes/s
9340  */
9341 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9342 {
9343 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9344 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9345 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9346 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9347 
9348 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9349 }
9350 
9351 /**
9352  * dce8_average_bandwidth - get the average available bandwidth
9353  *
9354  * @wm: watermark calculation data
9355  *
9356  * Calculate the average available bandwidth used for display (CIK).
9357  * Used for display watermark bandwidth calculations
9358  * Returns the average available bandwidth in MBytes/s
9359  */
9360 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9361 {
9362 	/* Calculate the display mode Average Bandwidth
9363 	 * DisplayMode should contain the source and destination dimensions,
9364 	 * timing, etc.
9365 	 */
9366 	fixed20_12 bpp;
9367 	fixed20_12 line_time;
9368 	fixed20_12 src_width;
9369 	fixed20_12 bandwidth;
9370 	fixed20_12 a;
9371 
9372 	a.full = dfixed_const(1000);
9373 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9374 	line_time.full = dfixed_div(line_time, a);
9375 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9376 	src_width.full = dfixed_const(wm->src_width);
9377 	bandwidth.full = dfixed_mul(src_width, bpp);
9378 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9379 	bandwidth.full = dfixed_div(bandwidth, line_time);
9380 
9381 	return dfixed_trunc(bandwidth);
9382 }
9383 
9384 /**
9385  * dce8_latency_watermark - get the latency watermark
9386  *
9387  * @wm: watermark calculation data
9388  *
9389  * Calculate the latency watermark (CIK).
9390  * Used for display watermark bandwidth calculations
9391  * Returns the latency watermark in ns
9392  */
9393 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9394 {
9395 	/* First calculate the latency in ns */
9396 	u32 mc_latency = 2000; /* 2000 ns. */
9397 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9398 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9399 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9400 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9401 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9402 		(wm->num_heads * cursor_line_pair_return_time);
9403 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9404 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9405 	u32 tmp, dmif_size = 12288;
9406 	fixed20_12 a, b, c;
9407 
9408 	if (wm->num_heads == 0)
9409 		return 0;
9410 
9411 	a.full = dfixed_const(2);
9412 	b.full = dfixed_const(1);
9413 	if ((wm->vsc.full > a.full) ||
9414 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9415 	    (wm->vtaps >= 5) ||
9416 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9417 		max_src_lines_per_dst_line = 4;
9418 	else
9419 		max_src_lines_per_dst_line = 2;
9420 
9421 	a.full = dfixed_const(available_bandwidth);
9422 	b.full = dfixed_const(wm->num_heads);
9423 	a.full = dfixed_div(a, b);
9424 
9425 	b.full = dfixed_const(mc_latency + 512);
9426 	c.full = dfixed_const(wm->disp_clk);
9427 	b.full = dfixed_div(b, c);
9428 
9429 	c.full = dfixed_const(dmif_size);
9430 	b.full = dfixed_div(c, b);
9431 
9432 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9433 
9434 	b.full = dfixed_const(1000);
9435 	c.full = dfixed_const(wm->disp_clk);
9436 	b.full = dfixed_div(c, b);
9437 	c.full = dfixed_const(wm->bytes_per_pixel);
9438 	b.full = dfixed_mul(b, c);
9439 
9440 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9441 
9442 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9443 	b.full = dfixed_const(1000);
9444 	c.full = dfixed_const(lb_fill_bw);
9445 	b.full = dfixed_div(c, b);
9446 	a.full = dfixed_div(a, b);
9447 	line_fill_time = dfixed_trunc(a);
9448 
9449 	if (line_fill_time < wm->active_time)
9450 		return latency;
9451 	else
9452 		return latency + (line_fill_time - wm->active_time);
9453 
9454 }
9455 
9456 /**
9457  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9458  * average and available dram bandwidth
9459  *
9460  * @wm: watermark calculation data
9461  *
9462  * Check if the display average bandwidth fits in the display
9463  * dram bandwidth (CIK).
9464  * Used for display watermark bandwidth calculations
9465  * Returns true if the display fits, false if not.
9466  */
9467 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9468 {
9469 	if (dce8_average_bandwidth(wm) <=
9470 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9471 		return true;
9472 	else
9473 		return false;
9474 }
9475 
9476 /**
9477  * dce8_average_bandwidth_vs_available_bandwidth - check
9478  * average and available bandwidth
9479  *
9480  * @wm: watermark calculation data
9481  *
9482  * Check if the display average bandwidth fits in the display
9483  * available bandwidth (CIK).
9484  * Used for display watermark bandwidth calculations
9485  * Returns true if the display fits, false if not.
9486  */
9487 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9488 {
9489 	if (dce8_average_bandwidth(wm) <=
9490 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9491 		return true;
9492 	else
9493 		return false;
9494 }
9495 
9496 /**
9497  * dce8_check_latency_hiding - check latency hiding
9498  *
9499  * @wm: watermark calculation data
9500  *
9501  * Check latency hiding (CIK).
9502  * Used for display watermark bandwidth calculations
9503  * Returns true if the display fits, false if not.
9504  */
9505 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9506 {
9507 	u32 lb_partitions = wm->lb_size / wm->src_width;
9508 	u32 line_time = wm->active_time + wm->blank_time;
9509 	u32 latency_tolerant_lines;
9510 	u32 latency_hiding;
9511 	fixed20_12 a;
9512 
9513 	a.full = dfixed_const(1);
9514 	if (wm->vsc.full > a.full)
9515 		latency_tolerant_lines = 1;
9516 	else {
9517 		if (lb_partitions <= (wm->vtaps + 1))
9518 			latency_tolerant_lines = 1;
9519 		else
9520 			latency_tolerant_lines = 2;
9521 	}
9522 
9523 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9524 
9525 	if (dce8_latency_watermark(wm) <= latency_hiding)
9526 		return true;
9527 	else
9528 		return false;
9529 }
9530 
9531 /**
9532  * dce8_program_watermarks - program display watermarks
9533  *
9534  * @rdev: radeon_device pointer
9535  * @radeon_crtc: the selected display controller
9536  * @lb_size: line buffer size
9537  * @num_heads: number of display controllers in use
9538  *
9539  * Calculate and program the display watermarks for the
9540  * selected display controller (CIK).
9541  */
9542 static void dce8_program_watermarks(struct radeon_device *rdev,
9543 				    struct radeon_crtc *radeon_crtc,
9544 				    u32 lb_size, u32 num_heads)
9545 {
9546 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9547 	struct dce8_wm_params wm_low, wm_high;
9548 	u32 pixel_period;
9549 	u32 line_time = 0;
9550 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9551 	u32 tmp, wm_mask;
9552 
9553 	if (radeon_crtc->base.enabled && num_heads && mode) {
9554 		pixel_period = 1000000 / (u32)mode->clock;
9555 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9556 
9557 		/* watermark for high clocks */
9558 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9559 		    rdev->pm.dpm_enabled) {
9560 			wm_high.yclk =
9561 				radeon_dpm_get_mclk(rdev, false) * 10;
9562 			wm_high.sclk =
9563 				radeon_dpm_get_sclk(rdev, false) * 10;
9564 		} else {
9565 			wm_high.yclk = rdev->pm.current_mclk * 10;
9566 			wm_high.sclk = rdev->pm.current_sclk * 10;
9567 		}
9568 
9569 		wm_high.disp_clk = mode->clock;
9570 		wm_high.src_width = mode->crtc_hdisplay;
9571 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9572 		wm_high.blank_time = line_time - wm_high.active_time;
9573 		wm_high.interlaced = false;
9574 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9575 			wm_high.interlaced = true;
9576 		wm_high.vsc = radeon_crtc->vsc;
9577 		wm_high.vtaps = 1;
9578 		if (radeon_crtc->rmx_type != RMX_OFF)
9579 			wm_high.vtaps = 2;
9580 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9581 		wm_high.lb_size = lb_size;
9582 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9583 		wm_high.num_heads = num_heads;
9584 
9585 		/* set for high clocks */
9586 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9587 
9588 		/* possibly force display priority to high */
9589 		/* should really do this at mode validation time... */
9590 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9591 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9592 		    !dce8_check_latency_hiding(&wm_high) ||
9593 		    (rdev->disp_priority == 2)) {
9594 			DRM_DEBUG_KMS("force priority to high\n");
9595 		}
9596 
9597 		/* watermark for low clocks */
9598 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9599 		    rdev->pm.dpm_enabled) {
9600 			wm_low.yclk =
9601 				radeon_dpm_get_mclk(rdev, true) * 10;
9602 			wm_low.sclk =
9603 				radeon_dpm_get_sclk(rdev, true) * 10;
9604 		} else {
9605 			wm_low.yclk = rdev->pm.current_mclk * 10;
9606 			wm_low.sclk = rdev->pm.current_sclk * 10;
9607 		}
9608 
9609 		wm_low.disp_clk = mode->clock;
9610 		wm_low.src_width = mode->crtc_hdisplay;
9611 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9612 		wm_low.blank_time = line_time - wm_low.active_time;
9613 		wm_low.interlaced = false;
9614 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9615 			wm_low.interlaced = true;
9616 		wm_low.vsc = radeon_crtc->vsc;
9617 		wm_low.vtaps = 1;
9618 		if (radeon_crtc->rmx_type != RMX_OFF)
9619 			wm_low.vtaps = 2;
9620 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9621 		wm_low.lb_size = lb_size;
9622 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9623 		wm_low.num_heads = num_heads;
9624 
9625 		/* set for low clocks */
9626 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9627 
9628 		/* possibly force display priority to high */
9629 		/* should really do this at mode validation time... */
9630 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9631 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9632 		    !dce8_check_latency_hiding(&wm_low) ||
9633 		    (rdev->disp_priority == 2)) {
9634 			DRM_DEBUG_KMS("force priority to high\n");
9635 		}
9636 
9637 		/* Save number of lines the linebuffer leads before the scanout */
9638 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9639 	}
9640 
9641 	/* select wm A */
9642 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9643 	tmp = wm_mask;
9644 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9645 	tmp |= LATENCY_WATERMARK_MASK(1);
9646 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9647 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9648 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9649 		LATENCY_HIGH_WATERMARK(line_time)));
9650 	/* select wm B */
9651 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9652 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9653 	tmp |= LATENCY_WATERMARK_MASK(2);
9654 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9655 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9656 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9657 		LATENCY_HIGH_WATERMARK(line_time)));
9658 	/* restore original selection */
9659 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9660 
9661 	/* save values for DPM */
9662 	radeon_crtc->line_time = line_time;
9663 	radeon_crtc->wm_high = latency_watermark_a;
9664 	radeon_crtc->wm_low = latency_watermark_b;
9665 }
9666 
9667 /**
9668  * dce8_bandwidth_update - program display watermarks
9669  *
9670  * @rdev: radeon_device pointer
9671  *
9672  * Calculate and program the display watermarks and line
9673  * buffer allocation (CIK).
9674  */
9675 void dce8_bandwidth_update(struct radeon_device *rdev)
9676 {
9677 	struct drm_display_mode *mode = NULL;
9678 	u32 num_heads = 0, lb_size;
9679 	int i;
9680 
9681 	if (!rdev->mode_info.mode_config_initialized)
9682 		return;
9683 
9684 	radeon_update_display_priority(rdev);
9685 
9686 	for (i = 0; i < rdev->num_crtc; i++) {
9687 		if (rdev->mode_info.crtcs[i]->base.enabled)
9688 			num_heads++;
9689 	}
9690 	for (i = 0; i < rdev->num_crtc; i++) {
9691 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9692 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9693 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9694 	}
9695 }
9696 
9697 /**
9698  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9699  *
9700  * @rdev: radeon_device pointer
9701  *
9702  * Fetches a GPU clock counter snapshot (SI).
9703  * Returns the 64 bit clock counter snapshot.
9704  */
9705 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9706 {
9707 	uint64_t clock;
9708 
9709 	mutex_lock(&rdev->gpu_clock_mutex);
9710 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9711 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9712 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9713 	mutex_unlock(&rdev->gpu_clock_mutex);
9714 	return clock;
9715 }
9716 
9717 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9718                               u32 cntl_reg, u32 status_reg)
9719 {
9720 	int r, i;
9721 	struct atom_clock_dividers dividers;
9722 	uint32_t tmp;
9723 
9724 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9725 					   clock, false, &dividers);
9726 	if (r)
9727 		return r;
9728 
9729 	tmp = RREG32_SMC(cntl_reg);
9730 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9731 	tmp |= dividers.post_divider;
9732 	WREG32_SMC(cntl_reg, tmp);
9733 
9734 	for (i = 0; i < 100; i++) {
9735 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9736 			break;
9737 		mdelay(10);
9738 	}
9739 	if (i == 100)
9740 		return -ETIMEDOUT;
9741 
9742 	return 0;
9743 }
9744 
9745 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9746 {
9747 	int r = 0;
9748 
9749 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9750 	if (r)
9751 		return r;
9752 
9753 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9754 	return r;
9755 }
9756 
9757 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9758 {
9759 	int r, i;
9760 	struct atom_clock_dividers dividers;
9761 	u32 tmp;
9762 
9763 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9764 					   ecclk, false, &dividers);
9765 	if (r)
9766 		return r;
9767 
9768 	for (i = 0; i < 100; i++) {
9769 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9770 			break;
9771 		mdelay(10);
9772 	}
9773 	if (i == 100)
9774 		return -ETIMEDOUT;
9775 
9776 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9777 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9778 	tmp |= dividers.post_divider;
9779 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9780 
9781 	for (i = 0; i < 100; i++) {
9782 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9783 			break;
9784 		mdelay(10);
9785 	}
9786 	if (i == 100)
9787 		return -ETIMEDOUT;
9788 
9789 	return 0;
9790 }
9791 
9792 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9793 {
9794 	struct pci_dev *root = rdev->pdev->bus->self;
9795 	int bridge_pos, gpu_pos;
9796 	u32 speed_cntl, mask, current_data_rate;
9797 	int ret, i;
9798 	u16 tmp16;
9799 
9800 	if (pci_is_root_bus(rdev->pdev->bus))
9801 		return;
9802 
9803 	if (radeon_pcie_gen2 == 0)
9804 		return;
9805 
9806 	if (rdev->flags & RADEON_IS_IGP)
9807 		return;
9808 
9809 	if (!(rdev->flags & RADEON_IS_PCIE))
9810 		return;
9811 
9812 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9813 	if (ret != 0)
9814 		return;
9815 
9816 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9817 		return;
9818 
9819 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9820 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9821 		LC_CURRENT_DATA_RATE_SHIFT;
9822 	if (mask & DRM_PCIE_SPEED_80) {
9823 		if (current_data_rate == 2) {
9824 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9825 			return;
9826 		}
9827 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9828 	} else if (mask & DRM_PCIE_SPEED_50) {
9829 		if (current_data_rate == 1) {
9830 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9831 			return;
9832 		}
9833 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9834 	}
9835 
9836 	bridge_pos = pci_pcie_cap(root);
9837 	if (!bridge_pos)
9838 		return;
9839 
9840 	gpu_pos = pci_pcie_cap(rdev->pdev);
9841 	if (!gpu_pos)
9842 		return;
9843 
9844 	if (mask & DRM_PCIE_SPEED_80) {
9845 		/* re-try equalization if gen3 is not already enabled */
9846 		if (current_data_rate != 2) {
9847 			u16 bridge_cfg, gpu_cfg;
9848 			u16 bridge_cfg2, gpu_cfg2;
9849 			u32 max_lw, current_lw, tmp;
9850 
9851 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9852 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9853 
9854 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9855 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9856 
9857 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9858 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9859 
9860 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9861 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9862 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9863 
9864 			if (current_lw < max_lw) {
9865 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9866 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9867 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9868 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9869 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9870 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9871 				}
9872 			}
9873 
9874 			for (i = 0; i < 10; i++) {
9875 				/* check status */
9876 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9877 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9878 					break;
9879 
9880 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9881 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9882 
9883 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9884 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9885 
9886 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9887 				tmp |= LC_SET_QUIESCE;
9888 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9889 
9890 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9891 				tmp |= LC_REDO_EQ;
9892 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9893 
9894 				mdelay(100);
9895 
9896 				/* linkctl */
9897 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9898 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9899 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9900 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9901 
9902 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9903 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9904 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9905 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9906 
9907 				/* linkctl2 */
9908 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9909 				tmp16 &= ~((1 << 4) | (7 << 9));
9910 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9911 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9912 
9913 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9914 				tmp16 &= ~((1 << 4) | (7 << 9));
9915 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9916 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9917 
9918 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9919 				tmp &= ~LC_SET_QUIESCE;
9920 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9921 			}
9922 		}
9923 	}
9924 
9925 	/* set the link speed */
9926 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9927 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9928 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9929 
9930 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9931 	tmp16 &= ~0xf;
9932 	if (mask & DRM_PCIE_SPEED_80)
9933 		tmp16 |= 3; /* gen3 */
9934 	else if (mask & DRM_PCIE_SPEED_50)
9935 		tmp16 |= 2; /* gen2 */
9936 	else
9937 		tmp16 |= 1; /* gen1 */
9938 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9939 
9940 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9941 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9942 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9943 
9944 	for (i = 0; i < rdev->usec_timeout; i++) {
9945 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9946 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9947 			break;
9948 		udelay(1);
9949 	}
9950 }
9951 
9952 static void cik_program_aspm(struct radeon_device *rdev)
9953 {
9954 	u32 data, orig;
9955 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9956 	bool disable_clkreq = false;
9957 
9958 	if (radeon_aspm == 0)
9959 		return;
9960 
9961 	/* XXX double check IGPs */
9962 	if (rdev->flags & RADEON_IS_IGP)
9963 		return;
9964 
9965 	if (!(rdev->flags & RADEON_IS_PCIE))
9966 		return;
9967 
9968 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9969 	data &= ~LC_XMIT_N_FTS_MASK;
9970 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9971 	if (orig != data)
9972 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9973 
9974 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9975 	data |= LC_GO_TO_RECOVERY;
9976 	if (orig != data)
9977 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9978 
9979 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9980 	data |= P_IGNORE_EDB_ERR;
9981 	if (orig != data)
9982 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9983 
9984 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9985 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9986 	data |= LC_PMI_TO_L1_DIS;
9987 	if (!disable_l0s)
9988 		data |= LC_L0S_INACTIVITY(7);
9989 
9990 	if (!disable_l1) {
9991 		data |= LC_L1_INACTIVITY(7);
9992 		data &= ~LC_PMI_TO_L1_DIS;
9993 		if (orig != data)
9994 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9995 
9996 		if (!disable_plloff_in_l1) {
9997 			bool clk_req_support;
9998 
9999 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
10000 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10001 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10002 			if (orig != data)
10003 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
10004 
10005 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
10006 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10007 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10008 			if (orig != data)
10009 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10010 
10011 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10012 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10013 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10014 			if (orig != data)
10015 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10016 
10017 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10018 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10019 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10020 			if (orig != data)
10021 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10022 
10023 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10024 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10025 			data |= LC_DYN_LANES_PWR_STATE(3);
10026 			if (orig != data)
10027 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10028 
10029 			if (!disable_clkreq &&
10030 			    !pci_is_root_bus(rdev->pdev->bus)) {
10031 				struct pci_dev *root = rdev->pdev->bus->self;
10032 				u32 lnkcap;
10033 
10034 				clk_req_support = false;
10035 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10036 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10037 					clk_req_support = true;
10038 			} else {
10039 				clk_req_support = false;
10040 			}
10041 
10042 			if (clk_req_support) {
10043 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10044 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10045 				if (orig != data)
10046 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10047 
10048 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10049 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10050 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10051 				if (orig != data)
10052 					WREG32_SMC(THM_CLK_CNTL, data);
10053 
10054 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10055 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10056 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10057 				if (orig != data)
10058 					WREG32_SMC(MISC_CLK_CTRL, data);
10059 
10060 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10061 				data &= ~BCLK_AS_XCLK;
10062 				if (orig != data)
10063 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10064 
10065 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10066 				data &= ~FORCE_BIF_REFCLK_EN;
10067 				if (orig != data)
10068 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10069 
10070 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10071 				data &= ~MPLL_CLKOUT_SEL_MASK;
10072 				data |= MPLL_CLKOUT_SEL(4);
10073 				if (orig != data)
10074 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10075 			}
10076 		}
10077 	} else {
10078 		if (orig != data)
10079 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10080 	}
10081 
10082 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10083 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10084 	if (orig != data)
10085 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10086 
10087 	if (!disable_l0s) {
10088 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10089 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10090 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10091 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10092 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10093 				data &= ~LC_L0S_INACTIVITY_MASK;
10094 				if (orig != data)
10095 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10096 			}
10097 		}
10098 	}
10099 }
10100