xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 9977a8c3497a8f7f7f951994f298a8e4d961234f)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146 					  bool enable);
147 
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159 				  u32 reg, u32 *val)
160 {
161 	switch (reg) {
162 	case GRBM_STATUS:
163 	case GRBM_STATUS2:
164 	case GRBM_STATUS_SE0:
165 	case GRBM_STATUS_SE1:
166 	case GRBM_STATUS_SE2:
167 	case GRBM_STATUS_SE3:
168 	case SRBM_STATUS:
169 	case SRBM_STATUS2:
170 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172 	case UVD_STATUS:
173 	/* TODO VCE */
174 		*val = RREG32(reg);
175 		return 0;
176 	default:
177 		return -EINVAL;
178 	}
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190 	WREG32(CIK_DIDT_IND_INDEX, (reg));
191 	r = RREG32(CIK_DIDT_IND_DATA);
192 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193 	return r;
194 }
195 
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198 	unsigned long flags;
199 
200 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201 	WREG32(CIK_DIDT_IND_INDEX, (reg));
202 	WREG32(CIK_DIDT_IND_DATA, (v));
203 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205 
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209 	u32 temp;
210 	int actual_temp = 0;
211 
212 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213 		CTF_TEMP_SHIFT;
214 
215 	if (temp & 0x200)
216 		actual_temp = 255;
217 	else
218 		actual_temp = temp & 0x1ff;
219 
220 	actual_temp = actual_temp * 1000;
221 
222 	return actual_temp;
223 }
224 
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228 	u32 temp;
229 	int actual_temp = 0;
230 
231 	temp = RREG32_SMC(0xC0300E0C);
232 
233 	if (temp)
234 		actual_temp = (temp / 8) - 49;
235 	else
236 		actual_temp = 0;
237 
238 	actual_temp = actual_temp * 1000;
239 
240 	return actual_temp;
241 }
242 
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248 	unsigned long flags;
249 	u32 r;
250 
251 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252 	WREG32(PCIE_INDEX, reg);
253 	(void)RREG32(PCIE_INDEX);
254 	r = RREG32(PCIE_DATA);
255 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256 	return r;
257 }
258 
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261 	unsigned long flags;
262 
263 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264 	WREG32(PCIE_INDEX, reg);
265 	(void)RREG32(PCIE_INDEX);
266 	WREG32(PCIE_DATA, v);
267 	(void)RREG32(PCIE_DATA);
268 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270 
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273 	(0x0e00 << 16) | (0xc12c >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc140 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc150 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc15c >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc168 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc170 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc178 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc204 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b4 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2b8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2bc >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc2c0 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x8228 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x829c >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x869c >> 2),
302 	0x00000000,
303 	(0x0600 << 16) | (0x98f4 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x98f8 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x9900 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc260 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x90e8 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c000 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x3c00c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x8c1c >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x9700 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x4e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x5e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x6e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x7e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x8e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0x9e00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xae00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0xbe00 << 16) | (0xcd20 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x89bc >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0x8900 >> 2),
342 	0x00000000,
343 	0x3,
344 	(0x0e00 << 16) | (0xc130 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc134 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc1fc >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc208 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc264 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc268 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc26c >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc270 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc274 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc278 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc27c >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc280 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc284 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc288 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc28c >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc290 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc294 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc298 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc29c >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a0 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a4 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2a8 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2ac  >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc2b0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x301d0 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30238 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30250 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30254 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30258 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0x3025c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc900 >> 2),
419 	0x00000000,
420 	(0x4e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x5e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x6e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x7e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x8e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0x9e00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xae00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0xbe00 << 16) | (0xc904 >> 2),
435 	0x00000000,
436 	(0x4e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x5e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x6e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x7e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x8e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0x9e00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xae00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0xbe00 << 16) | (0xc908 >> 2),
451 	0x00000000,
452 	(0x4e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x5e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x6e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x7e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x8e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0x9e00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xae00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0xbe00 << 16) | (0xc90c >> 2),
467 	0x00000000,
468 	(0x4e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x5e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x6e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x7e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x8e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0x9e00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xae00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0xbe00 << 16) | (0xc910 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xc99c >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9834 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f00 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f04 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f08 >> 2),
499 	0x00000000,
500 	(0x0000 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0001 << 16) | (0x30f0c >> 2),
503 	0x00000000,
504 	(0x0600 << 16) | (0x9b7c >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a14 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8a18 >> 2),
509 	0x00000000,
510 	(0x0600 << 16) | (0x30a00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bf0 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8bcc >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8b24 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x30a04 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a10 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a14 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a18 >> 2),
525 	0x00000000,
526 	(0x0600 << 16) | (0x30a2c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc700 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc704 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc708 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xc768 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc770 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc774 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc778 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc77c >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc780 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc784 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc788 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc78c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc798 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc79c >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a0 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a4 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7a8 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7ac >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b0 >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0xc7b4 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x9100 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x3c010 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92a8 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92ac >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b4 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92b8 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92bc >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c0 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c4 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92c8 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92cc >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x92d0 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c00 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c04 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c20 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c38 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8c3c >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xae00 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x9604 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac08 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac0c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac10 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac14 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac58 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac68 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac6c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac70 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac74 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac78 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac7c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac80 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac84 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac88 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xac8c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x970c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9714 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x9718 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x971c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x4e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x5e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x6e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x7e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x8e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0x9e00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xae00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0xbe00 << 16) | (0x31068 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd10 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xcd14 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b0 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b4 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88b8 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88bc >> 2),
673 	0x00000000,
674 	(0x0400 << 16) | (0x89c0 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c4 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88c8 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d0 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d4 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x88d8 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x8980 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30938 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x3093c >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30940 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89a0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30900 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30904 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x89b4 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c210 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c214 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x3c218 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x8904 >> 2),
709 	0x00000000,
710 	0x5,
711 	(0x0e00 << 16) | (0x8c28 >> 2),
712 	(0x0e00 << 16) | (0x8c2c >> 2),
713 	(0x0e00 << 16) | (0x8c30 >> 2),
714 	(0x0e00 << 16) | (0x8c34 >> 2),
715 	(0x0e00 << 16) | (0x9600 >> 2),
716 };
717 
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720 	(0x0e00 << 16) | (0xc12c >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc140 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc150 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc15c >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc168 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc170 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc204 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b4 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2b8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2bc >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc2c0 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x8228 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x829c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x869c >> 2),
747 	0x00000000,
748 	(0x0600 << 16) | (0x98f4 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x98f8 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9900 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc260 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x90e8 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c000 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x3c00c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8c1c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x9700 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x4e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x5e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x6e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x7e00 << 16) | (0xcd20 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x89bc >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x8900 >> 2),
779 	0x00000000,
780 	0x3,
781 	(0x0e00 << 16) | (0xc130 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc134 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc1fc >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc208 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc264 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc268 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc26c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc270 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc274 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc28c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc290 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc294 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc298 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a0 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a4 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2a8 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xc2ac >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x301d0 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30238 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30250 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30254 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x30258 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x3025c >> 2),
826 	0x00000000,
827 	(0x4e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x5e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x6e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x7e00 << 16) | (0xc900 >> 2),
834 	0x00000000,
835 	(0x4e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x5e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x6e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x7e00 << 16) | (0xc904 >> 2),
842 	0x00000000,
843 	(0x4e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x5e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x6e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x7e00 << 16) | (0xc908 >> 2),
850 	0x00000000,
851 	(0x4e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x5e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x6e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x7e00 << 16) | (0xc90c >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0xc910 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xc99c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x9834 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f00 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f04 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f08 >> 2),
876 	0x00000000,
877 	(0x0000 << 16) | (0x30f0c >> 2),
878 	0x00000000,
879 	(0x0600 << 16) | (0x9b7c >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8a18 >> 2),
884 	0x00000000,
885 	(0x0600 << 16) | (0x30a00 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bf0 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8bcc >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8b24 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30a04 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a10 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a14 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a18 >> 2),
900 	0x00000000,
901 	(0x0600 << 16) | (0x30a2c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc700 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc704 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc708 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0xc768 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc770 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc774 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc798 >> 2),
916 	0x00000000,
917 	(0x0400 << 16) | (0xc79c >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x9100 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c010 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c00 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c04 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c20 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c38 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x8c3c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xae00 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x9604 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac08 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac0c >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac10 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac14 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac58 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac68 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac6c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac70 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac74 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac78 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac7c >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac80 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac84 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac88 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xac8c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x970c >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9714 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x9718 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x971c >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x4e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x5e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x6e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x7e00 << 16) | (0x31068 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd10 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0xcd14 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b0 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b4 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88b8 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88bc >> 2),
996 	0x00000000,
997 	(0x0400 << 16) | (0x89c0 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c4 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88c8 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d0 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d4 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x88d8 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x8980 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30938 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x3093c >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30940 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x89a0 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30900 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x30904 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x89b4 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3e1fc >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c210 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c214 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x3c218 >> 2),
1032 	0x00000000,
1033 	(0x0e00 << 16) | (0x8904 >> 2),
1034 	0x00000000,
1035 	0x5,
1036 	(0x0e00 << 16) | (0x8c28 >> 2),
1037 	(0x0e00 << 16) | (0x8c2c >> 2),
1038 	(0x0e00 << 16) | (0x8c30 >> 2),
1039 	(0x0e00 << 16) | (0x8c34 >> 2),
1040 	(0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042 
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045 	0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047 
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050 	0xc770, 0xffffffff, 0x00000800,
1051 	0xc774, 0xffffffff, 0x00000800,
1052 	0xc798, 0xffffffff, 0x00007fbf,
1053 	0xc79c, 0xffffffff, 0x00007faf
1054 };
1055 
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058 	0x3354, 0x00000333, 0x00000333,
1059 	0x3350, 0x000c0fc0, 0x00040200,
1060 	0x9a10, 0x00010000, 0x00058208,
1061 	0x3c000, 0xffff1fff, 0x00140000,
1062 	0x3c200, 0xfdfc0fff, 0x00000100,
1063 	0x3c234, 0x40000000, 0x40000200,
1064 	0x9830, 0xffffffff, 0x00000000,
1065 	0x9834, 0xf00fffff, 0x00000400,
1066 	0x9838, 0x0002021c, 0x00020200,
1067 	0xc78, 0x00000080, 0x00000000,
1068 	0x5bb0, 0x000000f0, 0x00000070,
1069 	0x5bc0, 0xf0311fff, 0x80300000,
1070 	0x98f8, 0x73773777, 0x12010001,
1071 	0x350c, 0x00810000, 0x408af000,
1072 	0x7030, 0x31000111, 0x00000011,
1073 	0x2f48, 0x73773777, 0x12010001,
1074 	0x220c, 0x00007fb6, 0x0021a1b1,
1075 	0x2210, 0x00007fb6, 0x002021b1,
1076 	0x2180, 0x00007fb6, 0x00002191,
1077 	0x2218, 0x00007fb6, 0x002121b1,
1078 	0x221c, 0x00007fb6, 0x002021b1,
1079 	0x21dc, 0x00007fb6, 0x00002191,
1080 	0x21e0, 0x00007fb6, 0x00002191,
1081 	0x3628, 0x0000003f, 0x0000000a,
1082 	0x362c, 0x0000003f, 0x0000000a,
1083 	0x2ae4, 0x00073ffe, 0x000022a2,
1084 	0x240c, 0x000007ff, 0x00000000,
1085 	0x8a14, 0xf000003f, 0x00000007,
1086 	0x8bf0, 0x00002001, 0x00000001,
1087 	0x8b24, 0xffffffff, 0x00ffffff,
1088 	0x30a04, 0x0000ff0f, 0x00000000,
1089 	0x28a4c, 0x07ffffff, 0x06000000,
1090 	0x4d8, 0x00000fff, 0x00000100,
1091 	0x3e78, 0x00000001, 0x00000002,
1092 	0x9100, 0x03000000, 0x0362c688,
1093 	0x8c00, 0x000000ff, 0x00000001,
1094 	0xe40, 0x00001fff, 0x00001fff,
1095 	0x9060, 0x0000007f, 0x00000020,
1096 	0x9508, 0x00010000, 0x00010000,
1097 	0xac14, 0x000003ff, 0x000000f3,
1098 	0xac0c, 0xffffffff, 0x00001032
1099 };
1100 
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103 	0xc420, 0xffffffff, 0xfffffffc,
1104 	0x30800, 0xffffffff, 0xe0000000,
1105 	0x3c2a0, 0xffffffff, 0x00000100,
1106 	0x3c208, 0xffffffff, 0x00000100,
1107 	0x3c2c0, 0xffffffff, 0xc0000100,
1108 	0x3c2c8, 0xffffffff, 0xc0000100,
1109 	0x3c2c4, 0xffffffff, 0xc0000100,
1110 	0x55e4, 0xffffffff, 0x00600100,
1111 	0x3c280, 0xffffffff, 0x00000100,
1112 	0x3c214, 0xffffffff, 0x06000100,
1113 	0x3c220, 0xffffffff, 0x00000100,
1114 	0x3c218, 0xffffffff, 0x06000100,
1115 	0x3c204, 0xffffffff, 0x00000100,
1116 	0x3c2e0, 0xffffffff, 0x00000100,
1117 	0x3c224, 0xffffffff, 0x00000100,
1118 	0x3c200, 0xffffffff, 0x00000100,
1119 	0x3c230, 0xffffffff, 0x00000100,
1120 	0x3c234, 0xffffffff, 0x00000100,
1121 	0x3c250, 0xffffffff, 0x00000100,
1122 	0x3c254, 0xffffffff, 0x00000100,
1123 	0x3c258, 0xffffffff, 0x00000100,
1124 	0x3c25c, 0xffffffff, 0x00000100,
1125 	0x3c260, 0xffffffff, 0x00000100,
1126 	0x3c27c, 0xffffffff, 0x00000100,
1127 	0x3c278, 0xffffffff, 0x00000100,
1128 	0x3c210, 0xffffffff, 0x06000100,
1129 	0x3c290, 0xffffffff, 0x00000100,
1130 	0x3c274, 0xffffffff, 0x00000100,
1131 	0x3c2b4, 0xffffffff, 0x00000100,
1132 	0x3c2b0, 0xffffffff, 0x00000100,
1133 	0x3c270, 0xffffffff, 0x00000100,
1134 	0x30800, 0xffffffff, 0xe0000000,
1135 	0x3c020, 0xffffffff, 0x00010000,
1136 	0x3c024, 0xffffffff, 0x00030002,
1137 	0x3c028, 0xffffffff, 0x00040007,
1138 	0x3c02c, 0xffffffff, 0x00060005,
1139 	0x3c030, 0xffffffff, 0x00090008,
1140 	0x3c034, 0xffffffff, 0x00010000,
1141 	0x3c038, 0xffffffff, 0x00030002,
1142 	0x3c03c, 0xffffffff, 0x00040007,
1143 	0x3c040, 0xffffffff, 0x00060005,
1144 	0x3c044, 0xffffffff, 0x00090008,
1145 	0x3c048, 0xffffffff, 0x00010000,
1146 	0x3c04c, 0xffffffff, 0x00030002,
1147 	0x3c050, 0xffffffff, 0x00040007,
1148 	0x3c054, 0xffffffff, 0x00060005,
1149 	0x3c058, 0xffffffff, 0x00090008,
1150 	0x3c05c, 0xffffffff, 0x00010000,
1151 	0x3c060, 0xffffffff, 0x00030002,
1152 	0x3c064, 0xffffffff, 0x00040007,
1153 	0x3c068, 0xffffffff, 0x00060005,
1154 	0x3c06c, 0xffffffff, 0x00090008,
1155 	0x3c070, 0xffffffff, 0x00010000,
1156 	0x3c074, 0xffffffff, 0x00030002,
1157 	0x3c078, 0xffffffff, 0x00040007,
1158 	0x3c07c, 0xffffffff, 0x00060005,
1159 	0x3c080, 0xffffffff, 0x00090008,
1160 	0x3c084, 0xffffffff, 0x00010000,
1161 	0x3c088, 0xffffffff, 0x00030002,
1162 	0x3c08c, 0xffffffff, 0x00040007,
1163 	0x3c090, 0xffffffff, 0x00060005,
1164 	0x3c094, 0xffffffff, 0x00090008,
1165 	0x3c098, 0xffffffff, 0x00010000,
1166 	0x3c09c, 0xffffffff, 0x00030002,
1167 	0x3c0a0, 0xffffffff, 0x00040007,
1168 	0x3c0a4, 0xffffffff, 0x00060005,
1169 	0x3c0a8, 0xffffffff, 0x00090008,
1170 	0x3c000, 0xffffffff, 0x96e00200,
1171 	0x8708, 0xffffffff, 0x00900100,
1172 	0xc424, 0xffffffff, 0x0020003f,
1173 	0x38, 0xffffffff, 0x0140001c,
1174 	0x3c, 0x000f0000, 0x000f0000,
1175 	0x220, 0xffffffff, 0xC060000C,
1176 	0x224, 0xc0000fff, 0x00000100,
1177 	0xf90, 0xffffffff, 0x00000100,
1178 	0xf98, 0x00000101, 0x00000000,
1179 	0x20a8, 0xffffffff, 0x00000104,
1180 	0x55e4, 0xff000fff, 0x00000100,
1181 	0x30cc, 0xc0000fff, 0x00000104,
1182 	0xc1e4, 0x00000001, 0x00000001,
1183 	0xd00c, 0xff000ff0, 0x00000100,
1184 	0xd80c, 0xff000ff0, 0x00000100
1185 };
1186 
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189 	0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191 
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194 	0xc770, 0xffffffff, 0x00000800,
1195 	0xc774, 0xffffffff, 0x00000800,
1196 	0xc798, 0xffffffff, 0x00007fbf,
1197 	0xc79c, 0xffffffff, 0x00007faf
1198 };
1199 
1200 static const u32 spectre_golden_registers[] =
1201 {
1202 	0x3c000, 0xffff1fff, 0x96940200,
1203 	0x3c00c, 0xffff0001, 0xff000000,
1204 	0x3c200, 0xfffc0fff, 0x00000100,
1205 	0x6ed8, 0x00010101, 0x00010000,
1206 	0x9834, 0xf00fffff, 0x00000400,
1207 	0x9838, 0xfffffffc, 0x00020200,
1208 	0x5bb0, 0x000000f0, 0x00000070,
1209 	0x5bc0, 0xf0311fff, 0x80300000,
1210 	0x98f8, 0x73773777, 0x12010001,
1211 	0x9b7c, 0x00ff0000, 0x00fc0000,
1212 	0x2f48, 0x73773777, 0x12010001,
1213 	0x8a14, 0xf000003f, 0x00000007,
1214 	0x8b24, 0xffffffff, 0x00ffffff,
1215 	0x28350, 0x3f3f3fff, 0x00000082,
1216 	0x28354, 0x0000003f, 0x00000000,
1217 	0x3e78, 0x00000001, 0x00000002,
1218 	0x913c, 0xffff03df, 0x00000004,
1219 	0xc768, 0x00000008, 0x00000008,
1220 	0x8c00, 0x000008ff, 0x00000800,
1221 	0x9508, 0x00010000, 0x00010000,
1222 	0xac0c, 0xffffffff, 0x54763210,
1223 	0x214f8, 0x01ff01ff, 0x00000002,
1224 	0x21498, 0x007ff800, 0x00200000,
1225 	0x2015c, 0xffffffff, 0x00000f40,
1226 	0x30934, 0xffffffff, 0x00000001
1227 };
1228 
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231 	0xc420, 0xffffffff, 0xfffffffc,
1232 	0x30800, 0xffffffff, 0xe0000000,
1233 	0x3c2a0, 0xffffffff, 0x00000100,
1234 	0x3c208, 0xffffffff, 0x00000100,
1235 	0x3c2c0, 0xffffffff, 0x00000100,
1236 	0x3c2c8, 0xffffffff, 0x00000100,
1237 	0x3c2c4, 0xffffffff, 0x00000100,
1238 	0x55e4, 0xffffffff, 0x00600100,
1239 	0x3c280, 0xffffffff, 0x00000100,
1240 	0x3c214, 0xffffffff, 0x06000100,
1241 	0x3c220, 0xffffffff, 0x00000100,
1242 	0x3c218, 0xffffffff, 0x06000100,
1243 	0x3c204, 0xffffffff, 0x00000100,
1244 	0x3c2e0, 0xffffffff, 0x00000100,
1245 	0x3c224, 0xffffffff, 0x00000100,
1246 	0x3c200, 0xffffffff, 0x00000100,
1247 	0x3c230, 0xffffffff, 0x00000100,
1248 	0x3c234, 0xffffffff, 0x00000100,
1249 	0x3c250, 0xffffffff, 0x00000100,
1250 	0x3c254, 0xffffffff, 0x00000100,
1251 	0x3c258, 0xffffffff, 0x00000100,
1252 	0x3c25c, 0xffffffff, 0x00000100,
1253 	0x3c260, 0xffffffff, 0x00000100,
1254 	0x3c27c, 0xffffffff, 0x00000100,
1255 	0x3c278, 0xffffffff, 0x00000100,
1256 	0x3c210, 0xffffffff, 0x06000100,
1257 	0x3c290, 0xffffffff, 0x00000100,
1258 	0x3c274, 0xffffffff, 0x00000100,
1259 	0x3c2b4, 0xffffffff, 0x00000100,
1260 	0x3c2b0, 0xffffffff, 0x00000100,
1261 	0x3c270, 0xffffffff, 0x00000100,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c020, 0xffffffff, 0x00010000,
1264 	0x3c024, 0xffffffff, 0x00030002,
1265 	0x3c028, 0xffffffff, 0x00040007,
1266 	0x3c02c, 0xffffffff, 0x00060005,
1267 	0x3c030, 0xffffffff, 0x00090008,
1268 	0x3c034, 0xffffffff, 0x00010000,
1269 	0x3c038, 0xffffffff, 0x00030002,
1270 	0x3c03c, 0xffffffff, 0x00040007,
1271 	0x3c040, 0xffffffff, 0x00060005,
1272 	0x3c044, 0xffffffff, 0x00090008,
1273 	0x3c048, 0xffffffff, 0x00010000,
1274 	0x3c04c, 0xffffffff, 0x00030002,
1275 	0x3c050, 0xffffffff, 0x00040007,
1276 	0x3c054, 0xffffffff, 0x00060005,
1277 	0x3c058, 0xffffffff, 0x00090008,
1278 	0x3c05c, 0xffffffff, 0x00010000,
1279 	0x3c060, 0xffffffff, 0x00030002,
1280 	0x3c064, 0xffffffff, 0x00040007,
1281 	0x3c068, 0xffffffff, 0x00060005,
1282 	0x3c06c, 0xffffffff, 0x00090008,
1283 	0x3c070, 0xffffffff, 0x00010000,
1284 	0x3c074, 0xffffffff, 0x00030002,
1285 	0x3c078, 0xffffffff, 0x00040007,
1286 	0x3c07c, 0xffffffff, 0x00060005,
1287 	0x3c080, 0xffffffff, 0x00090008,
1288 	0x3c084, 0xffffffff, 0x00010000,
1289 	0x3c088, 0xffffffff, 0x00030002,
1290 	0x3c08c, 0xffffffff, 0x00040007,
1291 	0x3c090, 0xffffffff, 0x00060005,
1292 	0x3c094, 0xffffffff, 0x00090008,
1293 	0x3c098, 0xffffffff, 0x00010000,
1294 	0x3c09c, 0xffffffff, 0x00030002,
1295 	0x3c0a0, 0xffffffff, 0x00040007,
1296 	0x3c0a4, 0xffffffff, 0x00060005,
1297 	0x3c0a8, 0xffffffff, 0x00090008,
1298 	0x3c0ac, 0xffffffff, 0x00010000,
1299 	0x3c0b0, 0xffffffff, 0x00030002,
1300 	0x3c0b4, 0xffffffff, 0x00040007,
1301 	0x3c0b8, 0xffffffff, 0x00060005,
1302 	0x3c0bc, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0xf90, 0xffffffff, 0x00000100,
1311 	0xf98, 0x00000101, 0x00000000,
1312 	0x20a8, 0xffffffff, 0x00000104,
1313 	0x55e4, 0xff000fff, 0x00000100,
1314 	0x30cc, 0xc0000fff, 0x00000104,
1315 	0xc1e4, 0x00000001, 0x00000001,
1316 	0xd00c, 0xff000ff0, 0x00000100,
1317 	0xd80c, 0xff000ff0, 0x00000100
1318 };
1319 
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322 	0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324 
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327 	0xc770, 0xffffffff, 0x00000800,
1328 	0xc774, 0xffffffff, 0x00000800,
1329 	0xc798, 0xffffffff, 0x00007fbf,
1330 	0xc79c, 0xffffffff, 0x00007faf
1331 };
1332 
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335 	0x3c000, 0xffffdfff, 0x6e944040,
1336 	0x55e4, 0xff607fff, 0xfc000100,
1337 	0x3c220, 0xff000fff, 0x00000100,
1338 	0x3c224, 0xff000fff, 0x00000100,
1339 	0x3c200, 0xfffc0fff, 0x00000100,
1340 	0x6ed8, 0x00010101, 0x00010000,
1341 	0x9830, 0xffffffff, 0x00000000,
1342 	0x9834, 0xf00fffff, 0x00000400,
1343 	0x5bb0, 0x000000f0, 0x00000070,
1344 	0x5bc0, 0xf0311fff, 0x80300000,
1345 	0x98f8, 0x73773777, 0x12010001,
1346 	0x98fc, 0xffffffff, 0x00000010,
1347 	0x9b7c, 0x00ff0000, 0x00fc0000,
1348 	0x8030, 0x00001f0f, 0x0000100a,
1349 	0x2f48, 0x73773777, 0x12010001,
1350 	0x2408, 0x000fffff, 0x000c007f,
1351 	0x8a14, 0xf000003f, 0x00000007,
1352 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x4d8, 0x00000fff, 0x00000100,
1356 	0x3e78, 0x00000001, 0x00000002,
1357 	0xc768, 0x00000008, 0x00000008,
1358 	0x8c00, 0x000000ff, 0x00000003,
1359 	0x214f8, 0x01ff01ff, 0x00000002,
1360 	0x21498, 0x007ff800, 0x00200000,
1361 	0x2015c, 0xffffffff, 0x00000f40,
1362 	0x88c4, 0x001f3ae3, 0x00000082,
1363 	0x88d4, 0x0000001f, 0x00000010,
1364 	0x30934, 0xffffffff, 0x00000000
1365 };
1366 
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369 	0xc420, 0xffffffff, 0xfffffffc,
1370 	0x30800, 0xffffffff, 0xe0000000,
1371 	0x3c2a0, 0xffffffff, 0x00000100,
1372 	0x3c208, 0xffffffff, 0x00000100,
1373 	0x3c2c0, 0xffffffff, 0x00000100,
1374 	0x3c2c8, 0xffffffff, 0x00000100,
1375 	0x3c2c4, 0xffffffff, 0x00000100,
1376 	0x55e4, 0xffffffff, 0x00600100,
1377 	0x3c280, 0xffffffff, 0x00000100,
1378 	0x3c214, 0xffffffff, 0x06000100,
1379 	0x3c220, 0xffffffff, 0x00000100,
1380 	0x3c218, 0xffffffff, 0x06000100,
1381 	0x3c204, 0xffffffff, 0x00000100,
1382 	0x3c2e0, 0xffffffff, 0x00000100,
1383 	0x3c224, 0xffffffff, 0x00000100,
1384 	0x3c200, 0xffffffff, 0x00000100,
1385 	0x3c230, 0xffffffff, 0x00000100,
1386 	0x3c234, 0xffffffff, 0x00000100,
1387 	0x3c250, 0xffffffff, 0x00000100,
1388 	0x3c254, 0xffffffff, 0x00000100,
1389 	0x3c258, 0xffffffff, 0x00000100,
1390 	0x3c25c, 0xffffffff, 0x00000100,
1391 	0x3c260, 0xffffffff, 0x00000100,
1392 	0x3c27c, 0xffffffff, 0x00000100,
1393 	0x3c278, 0xffffffff, 0x00000100,
1394 	0x3c210, 0xffffffff, 0x06000100,
1395 	0x3c290, 0xffffffff, 0x00000100,
1396 	0x3c274, 0xffffffff, 0x00000100,
1397 	0x3c2b4, 0xffffffff, 0x00000100,
1398 	0x3c2b0, 0xffffffff, 0x00000100,
1399 	0x3c270, 0xffffffff, 0x00000100,
1400 	0x30800, 0xffffffff, 0xe0000000,
1401 	0x3c020, 0xffffffff, 0x00010000,
1402 	0x3c024, 0xffffffff, 0x00030002,
1403 	0x3c028, 0xffffffff, 0x00040007,
1404 	0x3c02c, 0xffffffff, 0x00060005,
1405 	0x3c030, 0xffffffff, 0x00090008,
1406 	0x3c034, 0xffffffff, 0x00010000,
1407 	0x3c038, 0xffffffff, 0x00030002,
1408 	0x3c03c, 0xffffffff, 0x00040007,
1409 	0x3c040, 0xffffffff, 0x00060005,
1410 	0x3c044, 0xffffffff, 0x00090008,
1411 	0x3c000, 0xffffffff, 0x96e00200,
1412 	0x8708, 0xffffffff, 0x00900100,
1413 	0xc424, 0xffffffff, 0x0020003f,
1414 	0x38, 0xffffffff, 0x0140001c,
1415 	0x3c, 0x000f0000, 0x000f0000,
1416 	0x220, 0xffffffff, 0xC060000C,
1417 	0x224, 0xc0000fff, 0x00000100,
1418 	0x20a8, 0xffffffff, 0x00000104,
1419 	0x55e4, 0xff000fff, 0x00000100,
1420 	0x30cc, 0xc0000fff, 0x00000104,
1421 	0xc1e4, 0x00000001, 0x00000001,
1422 	0xd00c, 0xff000ff0, 0x00000100,
1423 	0xd80c, 0xff000ff0, 0x00000100
1424 };
1425 
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428 	0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430 
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433 	0x30800, 0xffffffff, 0xe0000000,
1434 	0x28350, 0xffffffff, 0x3a00161a,
1435 	0x28354, 0xffffffff, 0x0000002e,
1436 	0x9a10, 0xffffffff, 0x00018208,
1437 	0x98f8, 0xffffffff, 0x12011003
1438 };
1439 
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442 	0x3354, 0x00000333, 0x00000333,
1443 	0x9a10, 0x00010000, 0x00058208,
1444 	0x9830, 0xffffffff, 0x00000000,
1445 	0x9834, 0xf00fffff, 0x00000400,
1446 	0x9838, 0x0002021c, 0x00020200,
1447 	0xc78, 0x00000080, 0x00000000,
1448 	0x5bb0, 0x000000f0, 0x00000070,
1449 	0x5bc0, 0xf0311fff, 0x80300000,
1450 	0x350c, 0x00810000, 0x408af000,
1451 	0x7030, 0x31000111, 0x00000011,
1452 	0x2f48, 0x73773777, 0x12010001,
1453 	0x2120, 0x0000007f, 0x0000001b,
1454 	0x21dc, 0x00007fb6, 0x00002191,
1455 	0x3628, 0x0000003f, 0x0000000a,
1456 	0x362c, 0x0000003f, 0x0000000a,
1457 	0x2ae4, 0x00073ffe, 0x000022a2,
1458 	0x240c, 0x000007ff, 0x00000000,
1459 	0x8bf0, 0x00002001, 0x00000001,
1460 	0x8b24, 0xffffffff, 0x00ffffff,
1461 	0x30a04, 0x0000ff0f, 0x00000000,
1462 	0x28a4c, 0x07ffffff, 0x06000000,
1463 	0x3e78, 0x00000001, 0x00000002,
1464 	0xc768, 0x00000008, 0x00000008,
1465 	0xc770, 0x00000f00, 0x00000800,
1466 	0xc774, 0x00000f00, 0x00000800,
1467 	0xc798, 0x00ffffff, 0x00ff7fbf,
1468 	0xc79c, 0x00ffffff, 0x00ff7faf,
1469 	0x8c00, 0x000000ff, 0x00000800,
1470 	0xe40, 0x00001fff, 0x00001fff,
1471 	0x9060, 0x0000007f, 0x00000020,
1472 	0x9508, 0x00010000, 0x00010000,
1473 	0xae00, 0x00100000, 0x000ff07c,
1474 	0xac14, 0x000003ff, 0x0000000f,
1475 	0xac10, 0xffffffff, 0x7564fdec,
1476 	0xac0c, 0xffffffff, 0x3120b9a8,
1477 	0xac08, 0x20000000, 0x0f9c0000
1478 };
1479 
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482 	0xc420, 0xffffffff, 0xfffffffd,
1483 	0x30800, 0xffffffff, 0xe0000000,
1484 	0x3c2a0, 0xffffffff, 0x00000100,
1485 	0x3c208, 0xffffffff, 0x00000100,
1486 	0x3c2c0, 0xffffffff, 0x00000100,
1487 	0x3c2c8, 0xffffffff, 0x00000100,
1488 	0x3c2c4, 0xffffffff, 0x00000100,
1489 	0x55e4, 0xffffffff, 0x00200100,
1490 	0x3c280, 0xffffffff, 0x00000100,
1491 	0x3c214, 0xffffffff, 0x06000100,
1492 	0x3c220, 0xffffffff, 0x00000100,
1493 	0x3c218, 0xffffffff, 0x06000100,
1494 	0x3c204, 0xffffffff, 0x00000100,
1495 	0x3c2e0, 0xffffffff, 0x00000100,
1496 	0x3c224, 0xffffffff, 0x00000100,
1497 	0x3c200, 0xffffffff, 0x00000100,
1498 	0x3c230, 0xffffffff, 0x00000100,
1499 	0x3c234, 0xffffffff, 0x00000100,
1500 	0x3c250, 0xffffffff, 0x00000100,
1501 	0x3c254, 0xffffffff, 0x00000100,
1502 	0x3c258, 0xffffffff, 0x00000100,
1503 	0x3c25c, 0xffffffff, 0x00000100,
1504 	0x3c260, 0xffffffff, 0x00000100,
1505 	0x3c27c, 0xffffffff, 0x00000100,
1506 	0x3c278, 0xffffffff, 0x00000100,
1507 	0x3c210, 0xffffffff, 0x06000100,
1508 	0x3c290, 0xffffffff, 0x00000100,
1509 	0x3c274, 0xffffffff, 0x00000100,
1510 	0x3c2b4, 0xffffffff, 0x00000100,
1511 	0x3c2b0, 0xffffffff, 0x00000100,
1512 	0x3c270, 0xffffffff, 0x00000100,
1513 	0x30800, 0xffffffff, 0xe0000000,
1514 	0x3c020, 0xffffffff, 0x00010000,
1515 	0x3c024, 0xffffffff, 0x00030002,
1516 	0x3c028, 0xffffffff, 0x00040007,
1517 	0x3c02c, 0xffffffff, 0x00060005,
1518 	0x3c030, 0xffffffff, 0x00090008,
1519 	0x3c034, 0xffffffff, 0x00010000,
1520 	0x3c038, 0xffffffff, 0x00030002,
1521 	0x3c03c, 0xffffffff, 0x00040007,
1522 	0x3c040, 0xffffffff, 0x00060005,
1523 	0x3c044, 0xffffffff, 0x00090008,
1524 	0x3c048, 0xffffffff, 0x00010000,
1525 	0x3c04c, 0xffffffff, 0x00030002,
1526 	0x3c050, 0xffffffff, 0x00040007,
1527 	0x3c054, 0xffffffff, 0x00060005,
1528 	0x3c058, 0xffffffff, 0x00090008,
1529 	0x3c05c, 0xffffffff, 0x00010000,
1530 	0x3c060, 0xffffffff, 0x00030002,
1531 	0x3c064, 0xffffffff, 0x00040007,
1532 	0x3c068, 0xffffffff, 0x00060005,
1533 	0x3c06c, 0xffffffff, 0x00090008,
1534 	0x3c070, 0xffffffff, 0x00010000,
1535 	0x3c074, 0xffffffff, 0x00030002,
1536 	0x3c078, 0xffffffff, 0x00040007,
1537 	0x3c07c, 0xffffffff, 0x00060005,
1538 	0x3c080, 0xffffffff, 0x00090008,
1539 	0x3c084, 0xffffffff, 0x00010000,
1540 	0x3c088, 0xffffffff, 0x00030002,
1541 	0x3c08c, 0xffffffff, 0x00040007,
1542 	0x3c090, 0xffffffff, 0x00060005,
1543 	0x3c094, 0xffffffff, 0x00090008,
1544 	0x3c098, 0xffffffff, 0x00010000,
1545 	0x3c09c, 0xffffffff, 0x00030002,
1546 	0x3c0a0, 0xffffffff, 0x00040007,
1547 	0x3c0a4, 0xffffffff, 0x00060005,
1548 	0x3c0a8, 0xffffffff, 0x00090008,
1549 	0x3c0ac, 0xffffffff, 0x00010000,
1550 	0x3c0b0, 0xffffffff, 0x00030002,
1551 	0x3c0b4, 0xffffffff, 0x00040007,
1552 	0x3c0b8, 0xffffffff, 0x00060005,
1553 	0x3c0bc, 0xffffffff, 0x00090008,
1554 	0x3c0c0, 0xffffffff, 0x00010000,
1555 	0x3c0c4, 0xffffffff, 0x00030002,
1556 	0x3c0c8, 0xffffffff, 0x00040007,
1557 	0x3c0cc, 0xffffffff, 0x00060005,
1558 	0x3c0d0, 0xffffffff, 0x00090008,
1559 	0x3c0d4, 0xffffffff, 0x00010000,
1560 	0x3c0d8, 0xffffffff, 0x00030002,
1561 	0x3c0dc, 0xffffffff, 0x00040007,
1562 	0x3c0e0, 0xffffffff, 0x00060005,
1563 	0x3c0e4, 0xffffffff, 0x00090008,
1564 	0x3c0e8, 0xffffffff, 0x00010000,
1565 	0x3c0ec, 0xffffffff, 0x00030002,
1566 	0x3c0f0, 0xffffffff, 0x00040007,
1567 	0x3c0f4, 0xffffffff, 0x00060005,
1568 	0x3c0f8, 0xffffffff, 0x00090008,
1569 	0xc318, 0xffffffff, 0x00020200,
1570 	0x3350, 0xffffffff, 0x00000200,
1571 	0x15c0, 0xffffffff, 0x00000400,
1572 	0x55e8, 0xffffffff, 0x00000000,
1573 	0x2f50, 0xffffffff, 0x00000902,
1574 	0x3c000, 0xffffffff, 0x96940200,
1575 	0x8708, 0xffffffff, 0x00900100,
1576 	0xc424, 0xffffffff, 0x0020003f,
1577 	0x38, 0xffffffff, 0x0140001c,
1578 	0x3c, 0x000f0000, 0x000f0000,
1579 	0x220, 0xffffffff, 0xc060000c,
1580 	0x224, 0xc0000fff, 0x00000100,
1581 	0xf90, 0xffffffff, 0x00000100,
1582 	0xf98, 0x00000101, 0x00000000,
1583 	0x20a8, 0xffffffff, 0x00000104,
1584 	0x55e4, 0xff000fff, 0x00000100,
1585 	0x30cc, 0xc0000fff, 0x00000104,
1586 	0xc1e4, 0x00000001, 0x00000001,
1587 	0xd00c, 0xff000ff0, 0x00000100,
1588 	0xd80c, 0xff000ff0, 0x00000100
1589 };
1590 
1591 static const u32 godavari_golden_registers[] =
1592 {
1593 	0x55e4, 0xff607fff, 0xfc000100,
1594 	0x6ed8, 0x00010101, 0x00010000,
1595 	0x9830, 0xffffffff, 0x00000000,
1596 	0x98302, 0xf00fffff, 0x00000400,
1597 	0x6130, 0xffffffff, 0x00010000,
1598 	0x5bb0, 0x000000f0, 0x00000070,
1599 	0x5bc0, 0xf0311fff, 0x80300000,
1600 	0x98f8, 0x73773777, 0x12010001,
1601 	0x98fc, 0xffffffff, 0x00000010,
1602 	0x8030, 0x00001f0f, 0x0000100a,
1603 	0x2f48, 0x73773777, 0x12010001,
1604 	0x2408, 0x000fffff, 0x000c007f,
1605 	0x8a14, 0xf000003f, 0x00000007,
1606 	0x8b24, 0xffffffff, 0x00ff0fff,
1607 	0x30a04, 0x0000ff0f, 0x00000000,
1608 	0x28a4c, 0x07ffffff, 0x06000000,
1609 	0x4d8, 0x00000fff, 0x00000100,
1610 	0xd014, 0x00010000, 0x00810001,
1611 	0xd814, 0x00010000, 0x00810001,
1612 	0x3e78, 0x00000001, 0x00000002,
1613 	0xc768, 0x00000008, 0x00000008,
1614 	0xc770, 0x00000f00, 0x00000800,
1615 	0xc774, 0x00000f00, 0x00000800,
1616 	0xc798, 0x00ffffff, 0x00ff7fbf,
1617 	0xc79c, 0x00ffffff, 0x00ff7faf,
1618 	0x8c00, 0x000000ff, 0x00000001,
1619 	0x214f8, 0x01ff01ff, 0x00000002,
1620 	0x21498, 0x007ff800, 0x00200000,
1621 	0x2015c, 0xffffffff, 0x00000f40,
1622 	0x88c4, 0x001f3ae3, 0x00000082,
1623 	0x88d4, 0x0000001f, 0x00000010,
1624 	0x30934, 0xffffffff, 0x00000000
1625 };
1626 
1627 
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630 	switch (rdev->family) {
1631 	case CHIP_BONAIRE:
1632 		radeon_program_register_sequence(rdev,
1633 						 bonaire_mgcg_cgcg_init,
1634 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_golden_registers,
1637 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_common_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_spm_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644 		break;
1645 	case CHIP_KABINI:
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_mgcg_cgcg_init,
1648 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_golden_registers,
1651 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_common_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_spm_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658 		break;
1659 	case CHIP_MULLINS:
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_mgcg_cgcg_init,
1662 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663 		radeon_program_register_sequence(rdev,
1664 						 godavari_golden_registers,
1665 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666 		radeon_program_register_sequence(rdev,
1667 						 kalindi_golden_common_registers,
1668 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_spm_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672 		break;
1673 	case CHIP_KAVERI:
1674 		radeon_program_register_sequence(rdev,
1675 						 spectre_mgcg_cgcg_init,
1676 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_golden_registers,
1679 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_common_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_spm_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686 		break;
1687 	case CHIP_HAWAII:
1688 		radeon_program_register_sequence(rdev,
1689 						 hawaii_mgcg_cgcg_init,
1690 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_golden_registers,
1693 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_common_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_spm_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700 		break;
1701 	default:
1702 		break;
1703 	}
1704 }
1705 
1706 /**
1707  * cik_get_xclk - get the xclk
1708  *
1709  * @rdev: radeon_device pointer
1710  *
1711  * Returns the reference clock used by the gfx engine
1712  * (CIK).
1713  */
1714 u32 cik_get_xclk(struct radeon_device *rdev)
1715 {
1716 	u32 reference_clock = rdev->clock.spll.reference_freq;
1717 
1718 	if (rdev->flags & RADEON_IS_IGP) {
1719 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720 			return reference_clock / 2;
1721 	} else {
1722 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723 			return reference_clock / 4;
1724 	}
1725 	return reference_clock;
1726 }
1727 
1728 /**
1729  * cik_mm_rdoorbell - read a doorbell dword
1730  *
1731  * @rdev: radeon_device pointer
1732  * @index: doorbell index
1733  *
1734  * Returns the value in the doorbell aperture at the
1735  * requested doorbell index (CIK).
1736  */
1737 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738 {
1739 	if (index < rdev->doorbell.num_doorbells) {
1740 		return readl(rdev->doorbell.ptr + index);
1741 	} else {
1742 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743 		return 0;
1744 	}
1745 }
1746 
1747 /**
1748  * cik_mm_wdoorbell - write a doorbell dword
1749  *
1750  * @rdev: radeon_device pointer
1751  * @index: doorbell index
1752  * @v: value to write
1753  *
1754  * Writes @v to the doorbell aperture at the
1755  * requested doorbell index (CIK).
1756  */
1757 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758 {
1759 	if (index < rdev->doorbell.num_doorbells) {
1760 		writel(v, rdev->doorbell.ptr + index);
1761 	} else {
1762 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763 	}
1764 }
1765 
1766 #define BONAIRE_IO_MC_REGS_SIZE 36
1767 
1768 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769 {
1770 	{0x00000070, 0x04400000},
1771 	{0x00000071, 0x80c01803},
1772 	{0x00000072, 0x00004004},
1773 	{0x00000073, 0x00000100},
1774 	{0x00000074, 0x00ff0000},
1775 	{0x00000075, 0x34000000},
1776 	{0x00000076, 0x08000014},
1777 	{0x00000077, 0x00cc08ec},
1778 	{0x00000078, 0x00000400},
1779 	{0x00000079, 0x00000000},
1780 	{0x0000007a, 0x04090000},
1781 	{0x0000007c, 0x00000000},
1782 	{0x0000007e, 0x4408a8e8},
1783 	{0x0000007f, 0x00000304},
1784 	{0x00000080, 0x00000000},
1785 	{0x00000082, 0x00000001},
1786 	{0x00000083, 0x00000002},
1787 	{0x00000084, 0xf3e4f400},
1788 	{0x00000085, 0x052024e3},
1789 	{0x00000087, 0x00000000},
1790 	{0x00000088, 0x01000000},
1791 	{0x0000008a, 0x1c0a0000},
1792 	{0x0000008b, 0xff010000},
1793 	{0x0000008d, 0xffffefff},
1794 	{0x0000008e, 0xfff3efff},
1795 	{0x0000008f, 0xfff3efbf},
1796 	{0x00000092, 0xf7ffffff},
1797 	{0x00000093, 0xffffff7f},
1798 	{0x00000095, 0x00101101},
1799 	{0x00000096, 0x00000fff},
1800 	{0x00000097, 0x00116fff},
1801 	{0x00000098, 0x60010000},
1802 	{0x00000099, 0x10010000},
1803 	{0x0000009a, 0x00006000},
1804 	{0x0000009b, 0x00001000},
1805 	{0x0000009f, 0x00b48000}
1806 };
1807 
1808 #define HAWAII_IO_MC_REGS_SIZE 22
1809 
1810 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811 {
1812 	{0x0000007d, 0x40000000},
1813 	{0x0000007e, 0x40180304},
1814 	{0x0000007f, 0x0000ff00},
1815 	{0x00000081, 0x00000000},
1816 	{0x00000083, 0x00000800},
1817 	{0x00000086, 0x00000000},
1818 	{0x00000087, 0x00000100},
1819 	{0x00000088, 0x00020100},
1820 	{0x00000089, 0x00000000},
1821 	{0x0000008b, 0x00040000},
1822 	{0x0000008c, 0x00000100},
1823 	{0x0000008e, 0xff010000},
1824 	{0x00000090, 0xffffefff},
1825 	{0x00000091, 0xfff3efff},
1826 	{0x00000092, 0xfff3efbf},
1827 	{0x00000093, 0xf7ffffff},
1828 	{0x00000094, 0xffffff7f},
1829 	{0x00000095, 0x00000fff},
1830 	{0x00000096, 0x00116fff},
1831 	{0x00000097, 0x60010000},
1832 	{0x00000098, 0x10010000},
1833 	{0x0000009f, 0x00c79000}
1834 };
1835 
1836 
1837 /**
1838  * cik_srbm_select - select specific register instances
1839  *
1840  * @rdev: radeon_device pointer
1841  * @me: selected ME (micro engine)
1842  * @pipe: pipe
1843  * @queue: queue
1844  * @vmid: VMID
1845  *
1846  * Switches the currently active registers instances.  Some
1847  * registers are instanced per VMID, others are instanced per
1848  * me/pipe/queue combination.
1849  */
1850 static void cik_srbm_select(struct radeon_device *rdev,
1851 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1852 {
1853 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854 			     MEID(me & 0x3) |
1855 			     VMID(vmid & 0xf) |
1856 			     QUEUEID(queue & 0x7));
1857 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858 }
1859 
1860 /* ucode loading */
1861 /**
1862  * ci_mc_load_microcode - load MC ucode into the hw
1863  *
1864  * @rdev: radeon_device pointer
1865  *
1866  * Load the GDDR MC ucode into the hw (CIK).
1867  * Returns 0 on success, error on failure.
1868  */
1869 int ci_mc_load_microcode(struct radeon_device *rdev)
1870 {
1871 	const __be32 *fw_data = NULL;
1872 	const __le32 *new_fw_data = NULL;
1873 	u32 running, tmp;
1874 	u32 *io_mc_regs = NULL;
1875 	const __le32 *new_io_mc_regs = NULL;
1876 	int i, regs_size, ucode_size;
1877 
1878 	if (!rdev->mc_fw)
1879 		return -EINVAL;
1880 
1881 	if (rdev->new_fw) {
1882 		const struct mc_firmware_header_v1_0 *hdr =
1883 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884 
1885 		radeon_ucode_print_mc_hdr(&hdr->header);
1886 
1887 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888 		new_io_mc_regs = (const __le32 *)
1889 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891 		new_fw_data = (const __le32 *)
1892 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893 	} else {
1894 		ucode_size = rdev->mc_fw->size / 4;
1895 
1896 		switch (rdev->family) {
1897 		case CHIP_BONAIRE:
1898 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900 			break;
1901 		case CHIP_HAWAII:
1902 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1904 			break;
1905 		default:
1906 			return -EINVAL;
1907 		}
1908 		fw_data = (const __be32 *)rdev->mc_fw->data;
1909 	}
1910 
1911 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912 
1913 	if (running == 0) {
1914 		/* reset the engine and set to writable */
1915 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917 
1918 		/* load mc io regs */
1919 		for (i = 0; i < regs_size; i++) {
1920 			if (rdev->new_fw) {
1921 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923 			} else {
1924 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926 			}
1927 		}
1928 
1929 		tmp = RREG32(MC_SEQ_MISC0);
1930 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935 		}
1936 
1937 		/* load the MC ucode */
1938 		for (i = 0; i < ucode_size; i++) {
1939 			if (rdev->new_fw)
1940 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941 			else
1942 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943 		}
1944 
1945 		/* put the engine back into the active state */
1946 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949 
1950 		/* wait for training to complete */
1951 		for (i = 0; i < rdev->usec_timeout; i++) {
1952 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953 				break;
1954 			udelay(1);
1955 		}
1956 		for (i = 0; i < rdev->usec_timeout; i++) {
1957 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958 				break;
1959 			udelay(1);
1960 		}
1961 	}
1962 
1963 	return 0;
1964 }
1965 
1966 /**
1967  * cik_init_microcode - load ucode images from disk
1968  *
1969  * @rdev: radeon_device pointer
1970  *
1971  * Use the firmware interface to load the ucode images into
1972  * the driver (not loaded into hw).
1973  * Returns 0 on success, error on failure.
1974  */
1975 static int cik_init_microcode(struct radeon_device *rdev)
1976 {
1977 	const char *chip_name;
1978 	const char *new_chip_name;
1979 	size_t pfp_req_size, me_req_size, ce_req_size,
1980 		mec_req_size, rlc_req_size, mc_req_size = 0,
1981 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982 	char fw_name[30];
1983 	int new_fw = 0;
1984 	int err;
1985 	int num_fw;
1986 	bool new_smc = false;
1987 
1988 	DRM_DEBUG("\n");
1989 
1990 	switch (rdev->family) {
1991 	case CHIP_BONAIRE:
1992 		chip_name = "BONAIRE";
1993 		if ((rdev->pdev->revision == 0x80) ||
1994 		    (rdev->pdev->revision == 0x81) ||
1995 		    (rdev->pdev->device == 0x665f))
1996 			new_smc = true;
1997 		new_chip_name = "bonaire";
1998 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2000 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2007 		num_fw = 8;
2008 		break;
2009 	case CHIP_HAWAII:
2010 		chip_name = "HAWAII";
2011 		if (rdev->pdev->revision == 0x80)
2012 			new_smc = true;
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072 			       rdev->pfp_fw->size, fw_name);
2073 			err = -EINVAL;
2074 			goto out;
2075 		}
2076 	} else {
2077 		err = radeon_ucode_validate(rdev->pfp_fw);
2078 		if (err) {
2079 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080 			       fw_name);
2081 			goto out;
2082 		} else {
2083 			new_fw++;
2084 		}
2085 	}
2086 
2087 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089 	if (err) {
2090 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092 		if (err)
2093 			goto out;
2094 		if (rdev->me_fw->size != me_req_size) {
2095 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096 			       rdev->me_fw->size, fw_name);
2097 			err = -EINVAL;
2098 		}
2099 	} else {
2100 		err = radeon_ucode_validate(rdev->me_fw);
2101 		if (err) {
2102 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103 			       fw_name);
2104 			goto out;
2105 		} else {
2106 			new_fw++;
2107 		}
2108 	}
2109 
2110 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112 	if (err) {
2113 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115 		if (err)
2116 			goto out;
2117 		if (rdev->ce_fw->size != ce_req_size) {
2118 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119 			       rdev->ce_fw->size, fw_name);
2120 			err = -EINVAL;
2121 		}
2122 	} else {
2123 		err = radeon_ucode_validate(rdev->ce_fw);
2124 		if (err) {
2125 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126 			       fw_name);
2127 			goto out;
2128 		} else {
2129 			new_fw++;
2130 		}
2131 	}
2132 
2133 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135 	if (err) {
2136 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138 		if (err)
2139 			goto out;
2140 		if (rdev->mec_fw->size != mec_req_size) {
2141 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142 			       rdev->mec_fw->size, fw_name);
2143 			err = -EINVAL;
2144 		}
2145 	} else {
2146 		err = radeon_ucode_validate(rdev->mec_fw);
2147 		if (err) {
2148 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149 			       fw_name);
2150 			goto out;
2151 		} else {
2152 			new_fw++;
2153 		}
2154 	}
2155 
2156 	if (rdev->family == CHIP_KAVERI) {
2157 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159 		if (err) {
2160 			goto out;
2161 		} else {
2162 			err = radeon_ucode_validate(rdev->mec2_fw);
2163 			if (err) {
2164 				goto out;
2165 			} else {
2166 				new_fw++;
2167 			}
2168 		}
2169 	}
2170 
2171 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173 	if (err) {
2174 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176 		if (err)
2177 			goto out;
2178 		if (rdev->rlc_fw->size != rlc_req_size) {
2179 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180 			       rdev->rlc_fw->size, fw_name);
2181 			err = -EINVAL;
2182 		}
2183 	} else {
2184 		err = radeon_ucode_validate(rdev->rlc_fw);
2185 		if (err) {
2186 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187 			       fw_name);
2188 			goto out;
2189 		} else {
2190 			new_fw++;
2191 		}
2192 	}
2193 
2194 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196 	if (err) {
2197 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199 		if (err)
2200 			goto out;
2201 		if (rdev->sdma_fw->size != sdma_req_size) {
2202 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203 			       rdev->sdma_fw->size, fw_name);
2204 			err = -EINVAL;
2205 		}
2206 	} else {
2207 		err = radeon_ucode_validate(rdev->sdma_fw);
2208 		if (err) {
2209 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210 			       fw_name);
2211 			goto out;
2212 		} else {
2213 			new_fw++;
2214 		}
2215 	}
2216 
2217 	/* No SMC, MC ucode on APUs */
2218 	if (!(rdev->flags & RADEON_IS_IGP)) {
2219 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221 		if (err) {
2222 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224 			if (err) {
2225 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227 				if (err)
2228 					goto out;
2229 			}
2230 			if ((rdev->mc_fw->size != mc_req_size) &&
2231 			    (rdev->mc_fw->size != mc2_req_size)){
2232 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233 				       rdev->mc_fw->size, fw_name);
2234 				err = -EINVAL;
2235 			}
2236 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237 		} else {
2238 			err = radeon_ucode_validate(rdev->mc_fw);
2239 			if (err) {
2240 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241 				       fw_name);
2242 				goto out;
2243 			} else {
2244 				new_fw++;
2245 			}
2246 		}
2247 
2248 		if (new_smc)
2249 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250 		else
2251 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253 		if (err) {
2254 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256 			if (err) {
2257 				pr_err("smc: error loading firmware \"%s\"\n",
2258 				       fw_name);
2259 				release_firmware(rdev->smc_fw);
2260 				rdev->smc_fw = NULL;
2261 				err = 0;
2262 			} else if (rdev->smc_fw->size != smc_req_size) {
2263 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264 				       rdev->smc_fw->size, fw_name);
2265 				err = -EINVAL;
2266 			}
2267 		} else {
2268 			err = radeon_ucode_validate(rdev->smc_fw);
2269 			if (err) {
2270 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271 				       fw_name);
2272 				goto out;
2273 			} else {
2274 				new_fw++;
2275 			}
2276 		}
2277 	}
2278 
2279 	if (new_fw == 0) {
2280 		rdev->new_fw = false;
2281 	} else if (new_fw < num_fw) {
2282 		pr_err("ci_fw: mixing new and old firmware!\n");
2283 		err = -EINVAL;
2284 	} else {
2285 		rdev->new_fw = true;
2286 	}
2287 
2288 out:
2289 	if (err) {
2290 		if (err != -EINVAL)
2291 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292 			       fw_name);
2293 		release_firmware(rdev->pfp_fw);
2294 		rdev->pfp_fw = NULL;
2295 		release_firmware(rdev->me_fw);
2296 		rdev->me_fw = NULL;
2297 		release_firmware(rdev->ce_fw);
2298 		rdev->ce_fw = NULL;
2299 		release_firmware(rdev->mec_fw);
2300 		rdev->mec_fw = NULL;
2301 		release_firmware(rdev->mec2_fw);
2302 		rdev->mec2_fw = NULL;
2303 		release_firmware(rdev->rlc_fw);
2304 		rdev->rlc_fw = NULL;
2305 		release_firmware(rdev->sdma_fw);
2306 		rdev->sdma_fw = NULL;
2307 		release_firmware(rdev->mc_fw);
2308 		rdev->mc_fw = NULL;
2309 		release_firmware(rdev->smc_fw);
2310 		rdev->smc_fw = NULL;
2311 	}
2312 	return err;
2313 }
2314 
2315 /*
2316  * Core functions
2317  */
2318 /**
2319  * cik_tiling_mode_table_init - init the hw tiling table
2320  *
2321  * @rdev: radeon_device pointer
2322  *
2323  * Starting with SI, the tiling setup is done globally in a
2324  * set of 32 tiling modes.  Rather than selecting each set of
2325  * parameters per surface as on older asics, we just select
2326  * which index in the tiling table we want to use, and the
2327  * surface uses those parameters (CIK).
2328  */
2329 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330 {
2331 	u32 *tile = rdev->config.cik.tile_mode_array;
2332 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333 	const u32 num_tile_mode_states =
2334 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335 	const u32 num_secondary_tile_mode_states =
2336 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337 	u32 reg_offset, split_equal_to_row_size;
2338 	u32 num_pipe_configs;
2339 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340 		rdev->config.cik.max_shader_engines;
2341 
2342 	switch (rdev->config.cik.mem_row_size_in_kb) {
2343 	case 1:
2344 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345 		break;
2346 	case 2:
2347 	default:
2348 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349 		break;
2350 	case 4:
2351 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352 		break;
2353 	}
2354 
2355 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356 	if (num_pipe_configs > 8)
2357 		num_pipe_configs = 16;
2358 
2359 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360 		tile[reg_offset] = 0;
2361 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362 		macrotile[reg_offset] = 0;
2363 
2364 	switch(num_pipe_configs) {
2365 	case 16:
2366 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 			   TILE_SPLIT(split_equal_to_row_size));
2386 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(split_equal_to_row_size));
2397 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 
2445 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 			   NUM_BANKS(ADDR_SURF_16_BANK));
2449 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 			   NUM_BANKS(ADDR_SURF_16_BANK));
2453 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 			   NUM_BANKS(ADDR_SURF_16_BANK));
2457 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 			   NUM_BANKS(ADDR_SURF_16_BANK));
2461 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 			   NUM_BANKS(ADDR_SURF_8_BANK));
2465 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 			   NUM_BANKS(ADDR_SURF_4_BANK));
2469 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 			   NUM_BANKS(ADDR_SURF_2_BANK));
2473 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 			   NUM_BANKS(ADDR_SURF_16_BANK));
2477 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 			    NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			    NUM_BANKS(ADDR_SURF_8_BANK));
2489 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			    NUM_BANKS(ADDR_SURF_4_BANK));
2493 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			    NUM_BANKS(ADDR_SURF_2_BANK));
2497 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			    NUM_BANKS(ADDR_SURF_2_BANK));
2501 
2502 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506 		break;
2507 
2508 	case 8:
2509 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 			   TILE_SPLIT(split_equal_to_row_size));
2529 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(split_equal_to_row_size));
2540 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 
2588 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591 				NUM_BANKS(ADDR_SURF_16_BANK));
2592 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595 				NUM_BANKS(ADDR_SURF_16_BANK));
2596 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 				NUM_BANKS(ADDR_SURF_16_BANK));
2600 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 				NUM_BANKS(ADDR_SURF_16_BANK));
2604 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607 				NUM_BANKS(ADDR_SURF_8_BANK));
2608 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 				NUM_BANKS(ADDR_SURF_4_BANK));
2612 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 				NUM_BANKS(ADDR_SURF_2_BANK));
2616 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619 				NUM_BANKS(ADDR_SURF_16_BANK));
2620 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635 				NUM_BANKS(ADDR_SURF_8_BANK));
2636 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_4_BANK));
2640 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_2_BANK));
2644 
2645 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649 		break;
2650 
2651 	case 4:
2652 		if (num_rbs == 4) {
2653 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 			   TILE_SPLIT(split_equal_to_row_size));
2673 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(split_equal_to_row_size));
2684 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 
2732 		} else if (num_rbs < 4) {
2733 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(split_equal_to_row_size));
2753 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(split_equal_to_row_size));
2764 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		}
2812 
2813 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816 				NUM_BANKS(ADDR_SURF_16_BANK));
2817 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 				NUM_BANKS(ADDR_SURF_16_BANK));
2829 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_8_BANK));
2837 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840 				NUM_BANKS(ADDR_SURF_4_BANK));
2841 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_8_BANK));
2865 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868 				NUM_BANKS(ADDR_SURF_4_BANK));
2869 
2870 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874 		break;
2875 
2876 	case 2:
2877 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879 			   PIPE_CONFIG(ADDR_SURF_P2) |
2880 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P2) |
2884 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 			   PIPE_CONFIG(ADDR_SURF_P2) |
2888 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P2) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P2) |
2896 			   TILE_SPLIT(split_equal_to_row_size));
2897 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(split_equal_to_row_size));
2908 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909 			   PIPE_CONFIG(ADDR_SURF_P2);
2910 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P2));
2913 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915 			    PIPE_CONFIG(ADDR_SURF_P2) |
2916 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 			    PIPE_CONFIG(ADDR_SURF_P2) |
2920 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 			    PIPE_CONFIG(ADDR_SURF_P2) |
2924 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			    PIPE_CONFIG(ADDR_SURF_P2) |
2927 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2));
2943 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2) |
2946 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 
2956 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 				NUM_BANKS(ADDR_SURF_16_BANK));
2960 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 				NUM_BANKS(ADDR_SURF_16_BANK));
2964 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 				NUM_BANKS(ADDR_SURF_16_BANK));
2980 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983 				NUM_BANKS(ADDR_SURF_8_BANK));
2984 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 				NUM_BANKS(ADDR_SURF_16_BANK));
2988 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 
3013 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017 		break;
3018 
3019 	default:
3020 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021 	}
3022 }
3023 
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036 			     u32 se_num, u32 sh_num)
3037 {
3038 	u32 data = INSTANCE_BROADCAST_WRITES;
3039 
3040 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042 	else if (se_num == 0xffffffff)
3043 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044 	else if (sh_num == 0xffffffff)
3045 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046 	else
3047 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048 	WREG32(GRBM_GFX_INDEX, data);
3049 }
3050 
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061 	u32 i, mask = 0;
3062 
3063 	for (i = 0; i < bit_width; i++) {
3064 		mask <<= 1;
3065 		mask |= 1;
3066 	}
3067 	return mask;
3068 }
3069 
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082 			      u32 max_rb_num_per_se,
3083 			      u32 sh_per_se)
3084 {
3085 	u32 data, mask;
3086 
3087 	data = RREG32(CC_RB_BACKEND_DISABLE);
3088 	if (data & 1)
3089 		data &= BACKEND_DISABLE_MASK;
3090 	else
3091 		data = 0;
3092 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093 
3094 	data >>= BACKEND_DISABLE_SHIFT;
3095 
3096 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097 
3098 	return data & mask;
3099 }
3100 
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112 			 u32 se_num, u32 sh_per_se,
3113 			 u32 max_rb_num_per_se)
3114 {
3115 	int i, j;
3116 	u32 data, mask;
3117 	u32 disabled_rbs = 0;
3118 	u32 enabled_rbs = 0;
3119 
3120 	for (i = 0; i < se_num; i++) {
3121 		for (j = 0; j < sh_per_se; j++) {
3122 			cik_select_se_sh(rdev, i, j);
3123 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124 			if (rdev->family == CHIP_HAWAII)
3125 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126 			else
3127 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128 		}
3129 	}
3130 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131 
3132 	mask = 1;
3133 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134 		if (!(disabled_rbs & mask))
3135 			enabled_rbs |= mask;
3136 		mask <<= 1;
3137 	}
3138 
3139 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3140 
3141 	for (i = 0; i < se_num; i++) {
3142 		cik_select_se_sh(rdev, i, 0xffffffff);
3143 		data = 0;
3144 		for (j = 0; j < sh_per_se; j++) {
3145 			switch (enabled_rbs & 3) {
3146 			case 0:
3147 				if (j == 0)
3148 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149 				else
3150 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151 				break;
3152 			case 1:
3153 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154 				break;
3155 			case 2:
3156 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157 				break;
3158 			case 3:
3159 			default:
3160 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			}
3163 			enabled_rbs >>= 2;
3164 		}
3165 		WREG32(PA_SC_RASTER_CONFIG, data);
3166 	}
3167 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169 
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181 	u32 mc_shared_chmap, mc_arb_ramcfg;
3182 	u32 hdp_host_path_cntl;
3183 	u32 tmp;
3184 	int i, j;
3185 
3186 	switch (rdev->family) {
3187 	case CHIP_BONAIRE:
3188 		rdev->config.cik.max_shader_engines = 2;
3189 		rdev->config.cik.max_tile_pipes = 4;
3190 		rdev->config.cik.max_cu_per_sh = 7;
3191 		rdev->config.cik.max_sh_per_se = 1;
3192 		rdev->config.cik.max_backends_per_se = 2;
3193 		rdev->config.cik.max_texture_channel_caches = 4;
3194 		rdev->config.cik.max_gprs = 256;
3195 		rdev->config.cik.max_gs_threads = 32;
3196 		rdev->config.cik.max_hw_contexts = 8;
3197 
3198 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203 		break;
3204 	case CHIP_HAWAII:
3205 		rdev->config.cik.max_shader_engines = 4;
3206 		rdev->config.cik.max_tile_pipes = 16;
3207 		rdev->config.cik.max_cu_per_sh = 11;
3208 		rdev->config.cik.max_sh_per_se = 1;
3209 		rdev->config.cik.max_backends_per_se = 4;
3210 		rdev->config.cik.max_texture_channel_caches = 16;
3211 		rdev->config.cik.max_gprs = 256;
3212 		rdev->config.cik.max_gs_threads = 32;
3213 		rdev->config.cik.max_hw_contexts = 8;
3214 
3215 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220 		break;
3221 	case CHIP_KAVERI:
3222 		rdev->config.cik.max_shader_engines = 1;
3223 		rdev->config.cik.max_tile_pipes = 4;
3224 		if ((rdev->pdev->device == 0x1304) ||
3225 		    (rdev->pdev->device == 0x1305) ||
3226 		    (rdev->pdev->device == 0x130C) ||
3227 		    (rdev->pdev->device == 0x130F) ||
3228 		    (rdev->pdev->device == 0x1310) ||
3229 		    (rdev->pdev->device == 0x1311) ||
3230 		    (rdev->pdev->device == 0x131C)) {
3231 			rdev->config.cik.max_cu_per_sh = 8;
3232 			rdev->config.cik.max_backends_per_se = 2;
3233 		} else if ((rdev->pdev->device == 0x1309) ||
3234 			   (rdev->pdev->device == 0x130A) ||
3235 			   (rdev->pdev->device == 0x130D) ||
3236 			   (rdev->pdev->device == 0x1313) ||
3237 			   (rdev->pdev->device == 0x131D)) {
3238 			rdev->config.cik.max_cu_per_sh = 6;
3239 			rdev->config.cik.max_backends_per_se = 2;
3240 		} else if ((rdev->pdev->device == 0x1306) ||
3241 			   (rdev->pdev->device == 0x1307) ||
3242 			   (rdev->pdev->device == 0x130B) ||
3243 			   (rdev->pdev->device == 0x130E) ||
3244 			   (rdev->pdev->device == 0x1315) ||
3245 			   (rdev->pdev->device == 0x1318) ||
3246 			   (rdev->pdev->device == 0x131B)) {
3247 			rdev->config.cik.max_cu_per_sh = 4;
3248 			rdev->config.cik.max_backends_per_se = 1;
3249 		} else {
3250 			rdev->config.cik.max_cu_per_sh = 3;
3251 			rdev->config.cik.max_backends_per_se = 1;
3252 		}
3253 		rdev->config.cik.max_sh_per_se = 1;
3254 		rdev->config.cik.max_texture_channel_caches = 4;
3255 		rdev->config.cik.max_gprs = 256;
3256 		rdev->config.cik.max_gs_threads = 16;
3257 		rdev->config.cik.max_hw_contexts = 8;
3258 
3259 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3260 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3261 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3262 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3263 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3264 		break;
3265 	case CHIP_KABINI:
3266 	case CHIP_MULLINS:
3267 	default:
3268 		rdev->config.cik.max_shader_engines = 1;
3269 		rdev->config.cik.max_tile_pipes = 2;
3270 		rdev->config.cik.max_cu_per_sh = 2;
3271 		rdev->config.cik.max_sh_per_se = 1;
3272 		rdev->config.cik.max_backends_per_se = 1;
3273 		rdev->config.cik.max_texture_channel_caches = 2;
3274 		rdev->config.cik.max_gprs = 256;
3275 		rdev->config.cik.max_gs_threads = 16;
3276 		rdev->config.cik.max_hw_contexts = 8;
3277 
3278 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283 		break;
3284 	}
3285 
3286 	/* Initialize HDP */
3287 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3288 		WREG32((0x2c14 + j), 0x00000000);
3289 		WREG32((0x2c18 + j), 0x00000000);
3290 		WREG32((0x2c1c + j), 0x00000000);
3291 		WREG32((0x2c20 + j), 0x00000000);
3292 		WREG32((0x2c24 + j), 0x00000000);
3293 	}
3294 
3295 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3296 	WREG32(SRBM_INT_CNTL, 0x1);
3297 	WREG32(SRBM_INT_ACK, 0x1);
3298 
3299 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3300 
3301 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3302 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3303 
3304 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3305 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3306 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3307 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3308 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3309 		rdev->config.cik.mem_row_size_in_kb = 4;
3310 	/* XXX use MC settings? */
3311 	rdev->config.cik.shader_engine_tile_size = 32;
3312 	rdev->config.cik.num_gpus = 1;
3313 	rdev->config.cik.multi_gpu_tile_size = 64;
3314 
3315 	/* fix up row size */
3316 	gb_addr_config &= ~ROW_SIZE_MASK;
3317 	switch (rdev->config.cik.mem_row_size_in_kb) {
3318 	case 1:
3319 	default:
3320 		gb_addr_config |= ROW_SIZE(0);
3321 		break;
3322 	case 2:
3323 		gb_addr_config |= ROW_SIZE(1);
3324 		break;
3325 	case 4:
3326 		gb_addr_config |= ROW_SIZE(2);
3327 		break;
3328 	}
3329 
3330 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3331 	 * not have bank info, so create a custom tiling dword.
3332 	 * bits 3:0   num_pipes
3333 	 * bits 7:4   num_banks
3334 	 * bits 11:8  group_size
3335 	 * bits 15:12 row_size
3336 	 */
3337 	rdev->config.cik.tile_config = 0;
3338 	switch (rdev->config.cik.num_tile_pipes) {
3339 	case 1:
3340 		rdev->config.cik.tile_config |= (0 << 0);
3341 		break;
3342 	case 2:
3343 		rdev->config.cik.tile_config |= (1 << 0);
3344 		break;
3345 	case 4:
3346 		rdev->config.cik.tile_config |= (2 << 0);
3347 		break;
3348 	case 8:
3349 	default:
3350 		/* XXX what about 12? */
3351 		rdev->config.cik.tile_config |= (3 << 0);
3352 		break;
3353 	}
3354 	rdev->config.cik.tile_config |=
3355 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3356 	rdev->config.cik.tile_config |=
3357 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3358 	rdev->config.cik.tile_config |=
3359 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3360 
3361 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3362 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3363 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3364 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3365 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3366 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3367 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3368 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3369 
3370 	cik_tiling_mode_table_init(rdev);
3371 
3372 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3373 		     rdev->config.cik.max_sh_per_se,
3374 		     rdev->config.cik.max_backends_per_se);
3375 
3376 	rdev->config.cik.active_cus = 0;
3377 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3378 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3379 			rdev->config.cik.active_cus +=
3380 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3381 		}
3382 	}
3383 
3384 	/* set HW defaults for 3D engine */
3385 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3386 
3387 	WREG32(SX_DEBUG_1, 0x20);
3388 
3389 	WREG32(TA_CNTL_AUX, 0x00010000);
3390 
3391 	tmp = RREG32(SPI_CONFIG_CNTL);
3392 	tmp |= 0x03000000;
3393 	WREG32(SPI_CONFIG_CNTL, tmp);
3394 
3395 	WREG32(SQ_CONFIG, 1);
3396 
3397 	WREG32(DB_DEBUG, 0);
3398 
3399 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3400 	tmp |= 0x00000400;
3401 	WREG32(DB_DEBUG2, tmp);
3402 
3403 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3404 	tmp |= 0x00020200;
3405 	WREG32(DB_DEBUG3, tmp);
3406 
3407 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3408 	tmp |= 0x00018208;
3409 	WREG32(CB_HW_CONTROL, tmp);
3410 
3411 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3412 
3413 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3414 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3415 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3416 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3417 
3418 	WREG32(VGT_NUM_INSTANCES, 1);
3419 
3420 	WREG32(CP_PERFMON_CNTL, 0);
3421 
3422 	WREG32(SQ_CONFIG, 0);
3423 
3424 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3425 					  FORCE_EOV_MAX_REZ_CNT(255)));
3426 
3427 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3428 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3429 
3430 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3431 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3432 
3433 	tmp = RREG32(HDP_MISC_CNTL);
3434 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3435 	WREG32(HDP_MISC_CNTL, tmp);
3436 
3437 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3438 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3439 
3440 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3441 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3442 
3443 	udelay(50);
3444 }
3445 
3446 /*
3447  * GPU scratch registers helpers function.
3448  */
3449 /**
3450  * cik_scratch_init - setup driver info for CP scratch regs
3451  *
3452  * @rdev: radeon_device pointer
3453  *
3454  * Set up the number and offset of the CP scratch registers.
3455  * NOTE: use of CP scratch registers is a legacy inferface and
3456  * is not used by default on newer asics (r6xx+).  On newer asics,
3457  * memory buffers are used for fences rather than scratch regs.
3458  */
3459 static void cik_scratch_init(struct radeon_device *rdev)
3460 {
3461 	int i;
3462 
3463 	rdev->scratch.num_reg = 7;
3464 	rdev->scratch.reg_base = SCRATCH_REG0;
3465 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3466 		rdev->scratch.free[i] = true;
3467 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3468 	}
3469 }
3470 
3471 /**
3472  * cik_ring_test - basic gfx ring test
3473  *
3474  * @rdev: radeon_device pointer
3475  * @ring: radeon_ring structure holding ring information
3476  *
3477  * Allocate a scratch register and write to it using the gfx ring (CIK).
3478  * Provides a basic gfx ring test to verify that the ring is working.
3479  * Used by cik_cp_gfx_resume();
3480  * Returns 0 on success, error on failure.
3481  */
3482 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3483 {
3484 	uint32_t scratch;
3485 	uint32_t tmp = 0;
3486 	unsigned i;
3487 	int r;
3488 
3489 	r = radeon_scratch_get(rdev, &scratch);
3490 	if (r) {
3491 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3492 		return r;
3493 	}
3494 	WREG32(scratch, 0xCAFEDEAD);
3495 	r = radeon_ring_lock(rdev, ring, 3);
3496 	if (r) {
3497 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3498 		radeon_scratch_free(rdev, scratch);
3499 		return r;
3500 	}
3501 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3502 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3503 	radeon_ring_write(ring, 0xDEADBEEF);
3504 	radeon_ring_unlock_commit(rdev, ring, false);
3505 
3506 	for (i = 0; i < rdev->usec_timeout; i++) {
3507 		tmp = RREG32(scratch);
3508 		if (tmp == 0xDEADBEEF)
3509 			break;
3510 		DRM_UDELAY(1);
3511 	}
3512 	if (i < rdev->usec_timeout) {
3513 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3514 	} else {
3515 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3516 			  ring->idx, scratch, tmp);
3517 		r = -EINVAL;
3518 	}
3519 	radeon_scratch_free(rdev, scratch);
3520 	return r;
3521 }
3522 
3523 /**
3524  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3525  *
3526  * @rdev: radeon_device pointer
3527  * @ridx: radeon ring index
3528  *
3529  * Emits an hdp flush on the cp.
3530  */
3531 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3532 				       int ridx)
3533 {
3534 	struct radeon_ring *ring = &rdev->ring[ridx];
3535 	u32 ref_and_mask;
3536 
3537 	switch (ring->idx) {
3538 	case CAYMAN_RING_TYPE_CP1_INDEX:
3539 	case CAYMAN_RING_TYPE_CP2_INDEX:
3540 	default:
3541 		switch (ring->me) {
3542 		case 0:
3543 			ref_and_mask = CP2 << ring->pipe;
3544 			break;
3545 		case 1:
3546 			ref_and_mask = CP6 << ring->pipe;
3547 			break;
3548 		default:
3549 			return;
3550 		}
3551 		break;
3552 	case RADEON_RING_TYPE_GFX_INDEX:
3553 		ref_and_mask = CP0;
3554 		break;
3555 	}
3556 
3557 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3558 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3559 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3560 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3561 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3562 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3563 	radeon_ring_write(ring, ref_and_mask);
3564 	radeon_ring_write(ring, ref_and_mask);
3565 	radeon_ring_write(ring, 0x20); /* poll interval */
3566 }
3567 
3568 /**
3569  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3570  *
3571  * @rdev: radeon_device pointer
3572  * @fence: radeon fence object
3573  *
3574  * Emits a fence sequnce number on the gfx ring and flushes
3575  * GPU caches.
3576  */
3577 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3578 			     struct radeon_fence *fence)
3579 {
3580 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3581 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3582 
3583 	/* Workaround for cache flush problems. First send a dummy EOP
3584 	 * event down the pipe with seq one below.
3585 	 */
3586 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3587 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3588 				 EOP_TC_ACTION_EN |
3589 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3590 				 EVENT_INDEX(5)));
3591 	radeon_ring_write(ring, addr & 0xfffffffc);
3592 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3593 				DATA_SEL(1) | INT_SEL(0));
3594 	radeon_ring_write(ring, fence->seq - 1);
3595 	radeon_ring_write(ring, 0);
3596 
3597 	/* Then send the real EOP event down the pipe. */
3598 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3599 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600 				 EOP_TC_ACTION_EN |
3601 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602 				 EVENT_INDEX(5)));
3603 	radeon_ring_write(ring, addr & 0xfffffffc);
3604 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3605 	radeon_ring_write(ring, fence->seq);
3606 	radeon_ring_write(ring, 0);
3607 }
3608 
3609 /**
3610  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3611  *
3612  * @rdev: radeon_device pointer
3613  * @fence: radeon fence object
3614  *
3615  * Emits a fence sequnce number on the compute ring and flushes
3616  * GPU caches.
3617  */
3618 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3619 				 struct radeon_fence *fence)
3620 {
3621 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3622 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3623 
3624 	/* RELEASE_MEM - flush caches, send int */
3625 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3626 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3627 				 EOP_TC_ACTION_EN |
3628 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3629 				 EVENT_INDEX(5)));
3630 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3631 	radeon_ring_write(ring, addr & 0xfffffffc);
3632 	radeon_ring_write(ring, upper_32_bits(addr));
3633 	radeon_ring_write(ring, fence->seq);
3634 	radeon_ring_write(ring, 0);
3635 }
3636 
3637 /**
3638  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3639  *
3640  * @rdev: radeon_device pointer
3641  * @ring: radeon ring buffer object
3642  * @semaphore: radeon semaphore object
3643  * @emit_wait: Is this a sempahore wait?
3644  *
3645  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3646  * from running ahead of semaphore waits.
3647  */
3648 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3649 			     struct radeon_ring *ring,
3650 			     struct radeon_semaphore *semaphore,
3651 			     bool emit_wait)
3652 {
3653 	uint64_t addr = semaphore->gpu_addr;
3654 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3655 
3656 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3657 	radeon_ring_write(ring, lower_32_bits(addr));
3658 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3659 
3660 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3661 		/* Prevent the PFP from running ahead of the semaphore wait */
3662 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3663 		radeon_ring_write(ring, 0x0);
3664 	}
3665 
3666 	return true;
3667 }
3668 
3669 /**
3670  * cik_copy_cpdma - copy pages using the CP DMA engine
3671  *
3672  * @rdev: radeon_device pointer
3673  * @src_offset: src GPU address
3674  * @dst_offset: dst GPU address
3675  * @num_gpu_pages: number of GPU pages to xfer
3676  * @resv: reservation object to sync to
3677  *
3678  * Copy GPU paging using the CP DMA engine (CIK+).
3679  * Used by the radeon ttm implementation to move pages if
3680  * registered as the asic copy callback.
3681  */
3682 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3683 				    uint64_t src_offset, uint64_t dst_offset,
3684 				    unsigned num_gpu_pages,
3685 				    struct reservation_object *resv)
3686 {
3687 	struct radeon_fence *fence;
3688 	struct radeon_sync sync;
3689 	int ring_index = rdev->asic->copy.blit_ring_index;
3690 	struct radeon_ring *ring = &rdev->ring[ring_index];
3691 	u32 size_in_bytes, cur_size_in_bytes, control;
3692 	int i, num_loops;
3693 	int r = 0;
3694 
3695 	radeon_sync_create(&sync);
3696 
3697 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3698 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3699 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3700 	if (r) {
3701 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3702 		radeon_sync_free(rdev, &sync, NULL);
3703 		return ERR_PTR(r);
3704 	}
3705 
3706 	radeon_sync_resv(rdev, &sync, resv, false);
3707 	radeon_sync_rings(rdev, &sync, ring->idx);
3708 
3709 	for (i = 0; i < num_loops; i++) {
3710 		cur_size_in_bytes = size_in_bytes;
3711 		if (cur_size_in_bytes > 0x1fffff)
3712 			cur_size_in_bytes = 0x1fffff;
3713 		size_in_bytes -= cur_size_in_bytes;
3714 		control = 0;
3715 		if (size_in_bytes == 0)
3716 			control |= PACKET3_DMA_DATA_CP_SYNC;
3717 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3718 		radeon_ring_write(ring, control);
3719 		radeon_ring_write(ring, lower_32_bits(src_offset));
3720 		radeon_ring_write(ring, upper_32_bits(src_offset));
3721 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3722 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3723 		radeon_ring_write(ring, cur_size_in_bytes);
3724 		src_offset += cur_size_in_bytes;
3725 		dst_offset += cur_size_in_bytes;
3726 	}
3727 
3728 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3729 	if (r) {
3730 		radeon_ring_unlock_undo(rdev, ring);
3731 		radeon_sync_free(rdev, &sync, NULL);
3732 		return ERR_PTR(r);
3733 	}
3734 
3735 	radeon_ring_unlock_commit(rdev, ring, false);
3736 	radeon_sync_free(rdev, &sync, fence);
3737 
3738 	return fence;
3739 }
3740 
3741 /*
3742  * IB stuff
3743  */
3744 /**
3745  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3746  *
3747  * @rdev: radeon_device pointer
3748  * @ib: radeon indirect buffer object
3749  *
3750  * Emits a DE (drawing engine) or CE (constant engine) IB
3751  * on the gfx ring.  IBs are usually generated by userspace
3752  * acceleration drivers and submitted to the kernel for
3753  * scheduling on the ring.  This function schedules the IB
3754  * on the gfx ring for execution by the GPU.
3755  */
3756 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3757 {
3758 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3759 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3760 	u32 header, control = INDIRECT_BUFFER_VALID;
3761 
3762 	if (ib->is_const_ib) {
3763 		/* set switch buffer packet before const IB */
3764 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3765 		radeon_ring_write(ring, 0);
3766 
3767 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3768 	} else {
3769 		u32 next_rptr;
3770 		if (ring->rptr_save_reg) {
3771 			next_rptr = ring->wptr + 3 + 4;
3772 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3773 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3774 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3775 			radeon_ring_write(ring, next_rptr);
3776 		} else if (rdev->wb.enabled) {
3777 			next_rptr = ring->wptr + 5 + 4;
3778 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3779 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3780 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3781 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3782 			radeon_ring_write(ring, next_rptr);
3783 		}
3784 
3785 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3786 	}
3787 
3788 	control |= ib->length_dw | (vm_id << 24);
3789 
3790 	radeon_ring_write(ring, header);
3791 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3792 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3793 	radeon_ring_write(ring, control);
3794 }
3795 
3796 /**
3797  * cik_ib_test - basic gfx ring IB test
3798  *
3799  * @rdev: radeon_device pointer
3800  * @ring: radeon_ring structure holding ring information
3801  *
3802  * Allocate an IB and execute it on the gfx ring (CIK).
3803  * Provides a basic gfx ring test to verify that IBs are working.
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808 	struct radeon_ib ib;
3809 	uint32_t scratch;
3810 	uint32_t tmp = 0;
3811 	unsigned i;
3812 	int r;
3813 
3814 	r = radeon_scratch_get(rdev, &scratch);
3815 	if (r) {
3816 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3817 		return r;
3818 	}
3819 	WREG32(scratch, 0xCAFEDEAD);
3820 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3821 	if (r) {
3822 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3823 		radeon_scratch_free(rdev, scratch);
3824 		return r;
3825 	}
3826 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3827 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3828 	ib.ptr[2] = 0xDEADBEEF;
3829 	ib.length_dw = 3;
3830 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3831 	if (r) {
3832 		radeon_scratch_free(rdev, scratch);
3833 		radeon_ib_free(rdev, &ib);
3834 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3835 		return r;
3836 	}
3837 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3838 		RADEON_USEC_IB_TEST_TIMEOUT));
3839 	if (r < 0) {
3840 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3841 		radeon_scratch_free(rdev, scratch);
3842 		radeon_ib_free(rdev, &ib);
3843 		return r;
3844 	} else if (r == 0) {
3845 		DRM_ERROR("radeon: fence wait timed out.\n");
3846 		radeon_scratch_free(rdev, scratch);
3847 		radeon_ib_free(rdev, &ib);
3848 		return -ETIMEDOUT;
3849 	}
3850 	r = 0;
3851 	for (i = 0; i < rdev->usec_timeout; i++) {
3852 		tmp = RREG32(scratch);
3853 		if (tmp == 0xDEADBEEF)
3854 			break;
3855 		DRM_UDELAY(1);
3856 	}
3857 	if (i < rdev->usec_timeout) {
3858 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3859 	} else {
3860 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3861 			  scratch, tmp);
3862 		r = -EINVAL;
3863 	}
3864 	radeon_scratch_free(rdev, scratch);
3865 	radeon_ib_free(rdev, &ib);
3866 	return r;
3867 }
3868 
3869 /*
3870  * CP.
3871  * On CIK, gfx and compute now have independant command processors.
3872  *
3873  * GFX
3874  * Gfx consists of a single ring and can process both gfx jobs and
3875  * compute jobs.  The gfx CP consists of three microengines (ME):
3876  * PFP - Pre-Fetch Parser
3877  * ME - Micro Engine
3878  * CE - Constant Engine
3879  * The PFP and ME make up what is considered the Drawing Engine (DE).
3880  * The CE is an asynchronous engine used for updating buffer desciptors
3881  * used by the DE so that they can be loaded into cache in parallel
3882  * while the DE is processing state update packets.
3883  *
3884  * Compute
3885  * The compute CP consists of two microengines (ME):
3886  * MEC1 - Compute MicroEngine 1
3887  * MEC2 - Compute MicroEngine 2
3888  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3889  * The queues are exposed to userspace and are programmed directly
3890  * by the compute runtime.
3891  */
3892 /**
3893  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3894  *
3895  * @rdev: radeon_device pointer
3896  * @enable: enable or disable the MEs
3897  *
3898  * Halts or unhalts the gfx MEs.
3899  */
3900 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3901 {
3902 	if (enable)
3903 		WREG32(CP_ME_CNTL, 0);
3904 	else {
3905 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3906 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3907 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3908 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3909 	}
3910 	udelay(50);
3911 }
3912 
3913 /**
3914  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3915  *
3916  * @rdev: radeon_device pointer
3917  *
3918  * Loads the gfx PFP, ME, and CE ucode.
3919  * Returns 0 for success, -EINVAL if the ucode is not available.
3920  */
3921 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3922 {
3923 	int i;
3924 
3925 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3926 		return -EINVAL;
3927 
3928 	cik_cp_gfx_enable(rdev, false);
3929 
3930 	if (rdev->new_fw) {
3931 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3932 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3933 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3934 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3935 		const struct gfx_firmware_header_v1_0 *me_hdr =
3936 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3937 		const __le32 *fw_data;
3938 		u32 fw_size;
3939 
3940 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3941 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3942 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3943 
3944 		/* PFP */
3945 		fw_data = (const __le32 *)
3946 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3947 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3948 		WREG32(CP_PFP_UCODE_ADDR, 0);
3949 		for (i = 0; i < fw_size; i++)
3950 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3951 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3952 
3953 		/* CE */
3954 		fw_data = (const __le32 *)
3955 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3956 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3957 		WREG32(CP_CE_UCODE_ADDR, 0);
3958 		for (i = 0; i < fw_size; i++)
3959 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3960 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3961 
3962 		/* ME */
3963 		fw_data = (const __be32 *)
3964 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3965 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3966 		WREG32(CP_ME_RAM_WADDR, 0);
3967 		for (i = 0; i < fw_size; i++)
3968 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3969 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3970 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3971 	} else {
3972 		const __be32 *fw_data;
3973 
3974 		/* PFP */
3975 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3976 		WREG32(CP_PFP_UCODE_ADDR, 0);
3977 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3978 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3979 		WREG32(CP_PFP_UCODE_ADDR, 0);
3980 
3981 		/* CE */
3982 		fw_data = (const __be32 *)rdev->ce_fw->data;
3983 		WREG32(CP_CE_UCODE_ADDR, 0);
3984 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3985 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3986 		WREG32(CP_CE_UCODE_ADDR, 0);
3987 
3988 		/* ME */
3989 		fw_data = (const __be32 *)rdev->me_fw->data;
3990 		WREG32(CP_ME_RAM_WADDR, 0);
3991 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3992 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3993 		WREG32(CP_ME_RAM_WADDR, 0);
3994 	}
3995 
3996 	return 0;
3997 }
3998 
3999 /**
4000  * cik_cp_gfx_start - start the gfx ring
4001  *
4002  * @rdev: radeon_device pointer
4003  *
4004  * Enables the ring and loads the clear state context and other
4005  * packets required to init the ring.
4006  * Returns 0 for success, error for failure.
4007  */
4008 static int cik_cp_gfx_start(struct radeon_device *rdev)
4009 {
4010 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4011 	int r, i;
4012 
4013 	/* init the CP */
4014 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4015 	WREG32(CP_ENDIAN_SWAP, 0);
4016 	WREG32(CP_DEVICE_ID, 1);
4017 
4018 	cik_cp_gfx_enable(rdev, true);
4019 
4020 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4021 	if (r) {
4022 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4023 		return r;
4024 	}
4025 
4026 	/* init the CE partitions.  CE only used for gfx on CIK */
4027 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4028 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4029 	radeon_ring_write(ring, 0x8000);
4030 	radeon_ring_write(ring, 0x8000);
4031 
4032 	/* setup clear context state */
4033 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4034 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4035 
4036 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4037 	radeon_ring_write(ring, 0x80000000);
4038 	radeon_ring_write(ring, 0x80000000);
4039 
4040 	for (i = 0; i < cik_default_size; i++)
4041 		radeon_ring_write(ring, cik_default_state[i]);
4042 
4043 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4044 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4045 
4046 	/* set clear context state */
4047 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4048 	radeon_ring_write(ring, 0);
4049 
4050 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4051 	radeon_ring_write(ring, 0x00000316);
4052 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4053 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4054 
4055 	radeon_ring_unlock_commit(rdev, ring, false);
4056 
4057 	return 0;
4058 }
4059 
4060 /**
4061  * cik_cp_gfx_fini - stop the gfx ring
4062  *
4063  * @rdev: radeon_device pointer
4064  *
4065  * Stop the gfx ring and tear down the driver ring
4066  * info.
4067  */
4068 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4069 {
4070 	cik_cp_gfx_enable(rdev, false);
4071 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4072 }
4073 
4074 /**
4075  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Program the location and size of the gfx ring buffer
4080  * and test it to make sure it's working.
4081  * Returns 0 for success, error for failure.
4082  */
4083 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4084 {
4085 	struct radeon_ring *ring;
4086 	u32 tmp;
4087 	u32 rb_bufsz;
4088 	u64 rb_addr;
4089 	int r;
4090 
4091 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4092 	if (rdev->family != CHIP_HAWAII)
4093 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4094 
4095 	/* Set the write pointer delay */
4096 	WREG32(CP_RB_WPTR_DELAY, 0);
4097 
4098 	/* set the RB to use vmid 0 */
4099 	WREG32(CP_RB_VMID, 0);
4100 
4101 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4102 
4103 	/* ring 0 - compute and gfx */
4104 	/* Set ring buffer size */
4105 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4106 	rb_bufsz = order_base_2(ring->ring_size / 8);
4107 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4108 #ifdef __BIG_ENDIAN
4109 	tmp |= BUF_SWAP_32BIT;
4110 #endif
4111 	WREG32(CP_RB0_CNTL, tmp);
4112 
4113 	/* Initialize the ring buffer's read and write pointers */
4114 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4115 	ring->wptr = 0;
4116 	WREG32(CP_RB0_WPTR, ring->wptr);
4117 
4118 	/* set the wb address wether it's enabled or not */
4119 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4120 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4121 
4122 	/* scratch register shadowing is no longer supported */
4123 	WREG32(SCRATCH_UMSK, 0);
4124 
4125 	if (!rdev->wb.enabled)
4126 		tmp |= RB_NO_UPDATE;
4127 
4128 	mdelay(1);
4129 	WREG32(CP_RB0_CNTL, tmp);
4130 
4131 	rb_addr = ring->gpu_addr >> 8;
4132 	WREG32(CP_RB0_BASE, rb_addr);
4133 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4134 
4135 	/* start the ring */
4136 	cik_cp_gfx_start(rdev);
4137 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4138 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4139 	if (r) {
4140 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4141 		return r;
4142 	}
4143 
4144 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4145 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4146 
4147 	return 0;
4148 }
4149 
4150 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4151 		     struct radeon_ring *ring)
4152 {
4153 	u32 rptr;
4154 
4155 	if (rdev->wb.enabled)
4156 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4157 	else
4158 		rptr = RREG32(CP_RB0_RPTR);
4159 
4160 	return rptr;
4161 }
4162 
4163 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4164 		     struct radeon_ring *ring)
4165 {
4166 	return RREG32(CP_RB0_WPTR);
4167 }
4168 
4169 void cik_gfx_set_wptr(struct radeon_device *rdev,
4170 		      struct radeon_ring *ring)
4171 {
4172 	WREG32(CP_RB0_WPTR, ring->wptr);
4173 	(void)RREG32(CP_RB0_WPTR);
4174 }
4175 
4176 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4177 			 struct radeon_ring *ring)
4178 {
4179 	u32 rptr;
4180 
4181 	if (rdev->wb.enabled) {
4182 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4183 	} else {
4184 		mutex_lock(&rdev->srbm_mutex);
4185 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4186 		rptr = RREG32(CP_HQD_PQ_RPTR);
4187 		cik_srbm_select(rdev, 0, 0, 0, 0);
4188 		mutex_unlock(&rdev->srbm_mutex);
4189 	}
4190 
4191 	return rptr;
4192 }
4193 
4194 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4195 			 struct radeon_ring *ring)
4196 {
4197 	u32 wptr;
4198 
4199 	if (rdev->wb.enabled) {
4200 		/* XXX check if swapping is necessary on BE */
4201 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4202 	} else {
4203 		mutex_lock(&rdev->srbm_mutex);
4204 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4205 		wptr = RREG32(CP_HQD_PQ_WPTR);
4206 		cik_srbm_select(rdev, 0, 0, 0, 0);
4207 		mutex_unlock(&rdev->srbm_mutex);
4208 	}
4209 
4210 	return wptr;
4211 }
4212 
4213 void cik_compute_set_wptr(struct radeon_device *rdev,
4214 			  struct radeon_ring *ring)
4215 {
4216 	/* XXX check if swapping is necessary on BE */
4217 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4218 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4219 }
4220 
4221 static void cik_compute_stop(struct radeon_device *rdev,
4222 			     struct radeon_ring *ring)
4223 {
4224 	u32 j, tmp;
4225 
4226 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4227 	/* Disable wptr polling. */
4228 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4229 	tmp &= ~WPTR_POLL_EN;
4230 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4231 	/* Disable HQD. */
4232 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4233 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4234 		for (j = 0; j < rdev->usec_timeout; j++) {
4235 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4236 				break;
4237 			udelay(1);
4238 		}
4239 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4240 		WREG32(CP_HQD_PQ_RPTR, 0);
4241 		WREG32(CP_HQD_PQ_WPTR, 0);
4242 	}
4243 	cik_srbm_select(rdev, 0, 0, 0, 0);
4244 }
4245 
4246 /**
4247  * cik_cp_compute_enable - enable/disable the compute CP MEs
4248  *
4249  * @rdev: radeon_device pointer
4250  * @enable: enable or disable the MEs
4251  *
4252  * Halts or unhalts the compute MEs.
4253  */
4254 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4255 {
4256 	if (enable)
4257 		WREG32(CP_MEC_CNTL, 0);
4258 	else {
4259 		/*
4260 		 * To make hibernation reliable we need to clear compute ring
4261 		 * configuration before halting the compute ring.
4262 		 */
4263 		mutex_lock(&rdev->srbm_mutex);
4264 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4265 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4266 		mutex_unlock(&rdev->srbm_mutex);
4267 
4268 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4269 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4270 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4271 	}
4272 	udelay(50);
4273 }
4274 
4275 /**
4276  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4277  *
4278  * @rdev: radeon_device pointer
4279  *
4280  * Loads the compute MEC1&2 ucode.
4281  * Returns 0 for success, -EINVAL if the ucode is not available.
4282  */
4283 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4284 {
4285 	int i;
4286 
4287 	if (!rdev->mec_fw)
4288 		return -EINVAL;
4289 
4290 	cik_cp_compute_enable(rdev, false);
4291 
4292 	if (rdev->new_fw) {
4293 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4294 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4295 		const __le32 *fw_data;
4296 		u32 fw_size;
4297 
4298 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4299 
4300 		/* MEC1 */
4301 		fw_data = (const __le32 *)
4302 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4303 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4304 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305 		for (i = 0; i < fw_size; i++)
4306 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4307 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4308 
4309 		/* MEC2 */
4310 		if (rdev->family == CHIP_KAVERI) {
4311 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4312 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4313 
4314 			fw_data = (const __le32 *)
4315 				(rdev->mec2_fw->data +
4316 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4317 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4318 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4319 			for (i = 0; i < fw_size; i++)
4320 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4321 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4322 		}
4323 	} else {
4324 		const __be32 *fw_data;
4325 
4326 		/* MEC1 */
4327 		fw_data = (const __be32 *)rdev->mec_fw->data;
4328 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4329 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4330 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4331 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4332 
4333 		if (rdev->family == CHIP_KAVERI) {
4334 			/* MEC2 */
4335 			fw_data = (const __be32 *)rdev->mec_fw->data;
4336 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4337 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4338 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4339 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4340 		}
4341 	}
4342 
4343 	return 0;
4344 }
4345 
4346 /**
4347  * cik_cp_compute_start - start the compute queues
4348  *
4349  * @rdev: radeon_device pointer
4350  *
4351  * Enable the compute queues.
4352  * Returns 0 for success, error for failure.
4353  */
4354 static int cik_cp_compute_start(struct radeon_device *rdev)
4355 {
4356 	cik_cp_compute_enable(rdev, true);
4357 
4358 	return 0;
4359 }
4360 
4361 /**
4362  * cik_cp_compute_fini - stop the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Stop the compute queues and tear down the driver queue
4367  * info.
4368  */
4369 static void cik_cp_compute_fini(struct radeon_device *rdev)
4370 {
4371 	int i, idx, r;
4372 
4373 	cik_cp_compute_enable(rdev, false);
4374 
4375 	for (i = 0; i < 2; i++) {
4376 		if (i == 0)
4377 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4378 		else
4379 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4380 
4381 		if (rdev->ring[idx].mqd_obj) {
4382 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4383 			if (unlikely(r != 0))
4384 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4385 
4386 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4387 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4388 
4389 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4390 			rdev->ring[idx].mqd_obj = NULL;
4391 		}
4392 	}
4393 }
4394 
4395 static void cik_mec_fini(struct radeon_device *rdev)
4396 {
4397 	int r;
4398 
4399 	if (rdev->mec.hpd_eop_obj) {
4400 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4401 		if (unlikely(r != 0))
4402 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4403 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4404 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4405 
4406 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4407 		rdev->mec.hpd_eop_obj = NULL;
4408 	}
4409 }
4410 
4411 #define MEC_HPD_SIZE 2048
4412 
4413 static int cik_mec_init(struct radeon_device *rdev)
4414 {
4415 	int r;
4416 	u32 *hpd;
4417 
4418 	/*
4419 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4420 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4421 	 */
4422 	if (rdev->family == CHIP_KAVERI)
4423 		rdev->mec.num_mec = 2;
4424 	else
4425 		rdev->mec.num_mec = 1;
4426 	rdev->mec.num_pipe = 4;
4427 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4428 
4429 	if (rdev->mec.hpd_eop_obj == NULL) {
4430 		r = radeon_bo_create(rdev,
4431 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4432 				     PAGE_SIZE, true,
4433 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4434 				     &rdev->mec.hpd_eop_obj);
4435 		if (r) {
4436 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4437 			return r;
4438 		}
4439 	}
4440 
4441 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4442 	if (unlikely(r != 0)) {
4443 		cik_mec_fini(rdev);
4444 		return r;
4445 	}
4446 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4447 			  &rdev->mec.hpd_eop_gpu_addr);
4448 	if (r) {
4449 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4450 		cik_mec_fini(rdev);
4451 		return r;
4452 	}
4453 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4454 	if (r) {
4455 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4456 		cik_mec_fini(rdev);
4457 		return r;
4458 	}
4459 
4460 	/* clear memory.  Not sure if this is required or not */
4461 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4462 
4463 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4464 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4465 
4466 	return 0;
4467 }
4468 
4469 struct hqd_registers
4470 {
4471 	u32 cp_mqd_base_addr;
4472 	u32 cp_mqd_base_addr_hi;
4473 	u32 cp_hqd_active;
4474 	u32 cp_hqd_vmid;
4475 	u32 cp_hqd_persistent_state;
4476 	u32 cp_hqd_pipe_priority;
4477 	u32 cp_hqd_queue_priority;
4478 	u32 cp_hqd_quantum;
4479 	u32 cp_hqd_pq_base;
4480 	u32 cp_hqd_pq_base_hi;
4481 	u32 cp_hqd_pq_rptr;
4482 	u32 cp_hqd_pq_rptr_report_addr;
4483 	u32 cp_hqd_pq_rptr_report_addr_hi;
4484 	u32 cp_hqd_pq_wptr_poll_addr;
4485 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4486 	u32 cp_hqd_pq_doorbell_control;
4487 	u32 cp_hqd_pq_wptr;
4488 	u32 cp_hqd_pq_control;
4489 	u32 cp_hqd_ib_base_addr;
4490 	u32 cp_hqd_ib_base_addr_hi;
4491 	u32 cp_hqd_ib_rptr;
4492 	u32 cp_hqd_ib_control;
4493 	u32 cp_hqd_iq_timer;
4494 	u32 cp_hqd_iq_rptr;
4495 	u32 cp_hqd_dequeue_request;
4496 	u32 cp_hqd_dma_offload;
4497 	u32 cp_hqd_sema_cmd;
4498 	u32 cp_hqd_msg_type;
4499 	u32 cp_hqd_atomic0_preop_lo;
4500 	u32 cp_hqd_atomic0_preop_hi;
4501 	u32 cp_hqd_atomic1_preop_lo;
4502 	u32 cp_hqd_atomic1_preop_hi;
4503 	u32 cp_hqd_hq_scheduler0;
4504 	u32 cp_hqd_hq_scheduler1;
4505 	u32 cp_mqd_control;
4506 };
4507 
4508 struct bonaire_mqd
4509 {
4510 	u32 header;
4511 	u32 dispatch_initiator;
4512 	u32 dimensions[3];
4513 	u32 start_idx[3];
4514 	u32 num_threads[3];
4515 	u32 pipeline_stat_enable;
4516 	u32 perf_counter_enable;
4517 	u32 pgm[2];
4518 	u32 tba[2];
4519 	u32 tma[2];
4520 	u32 pgm_rsrc[2];
4521 	u32 vmid;
4522 	u32 resource_limits;
4523 	u32 static_thread_mgmt01[2];
4524 	u32 tmp_ring_size;
4525 	u32 static_thread_mgmt23[2];
4526 	u32 restart[3];
4527 	u32 thread_trace_enable;
4528 	u32 reserved1;
4529 	u32 user_data[16];
4530 	u32 vgtcs_invoke_count[2];
4531 	struct hqd_registers queue_state;
4532 	u32 dequeue_cntr;
4533 	u32 interrupt_queue[64];
4534 };
4535 
4536 /**
4537  * cik_cp_compute_resume - setup the compute queue registers
4538  *
4539  * @rdev: radeon_device pointer
4540  *
4541  * Program the compute queues and test them to make sure they
4542  * are working.
4543  * Returns 0 for success, error for failure.
4544  */
4545 static int cik_cp_compute_resume(struct radeon_device *rdev)
4546 {
4547 	int r, i, j, idx;
4548 	u32 tmp;
4549 	bool use_doorbell = true;
4550 	u64 hqd_gpu_addr;
4551 	u64 mqd_gpu_addr;
4552 	u64 eop_gpu_addr;
4553 	u64 wb_gpu_addr;
4554 	u32 *buf;
4555 	struct bonaire_mqd *mqd;
4556 
4557 	r = cik_cp_compute_start(rdev);
4558 	if (r)
4559 		return r;
4560 
4561 	/* fix up chicken bits */
4562 	tmp = RREG32(CP_CPF_DEBUG);
4563 	tmp |= (1 << 23);
4564 	WREG32(CP_CPF_DEBUG, tmp);
4565 
4566 	/* init the pipes */
4567 	mutex_lock(&rdev->srbm_mutex);
4568 
4569 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4570 		int me = (i < 4) ? 1 : 2;
4571 		int pipe = (i < 4) ? i : (i - 4);
4572 
4573 		cik_srbm_select(rdev, me, pipe, 0, 0);
4574 
4575 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4576 		/* write the EOP addr */
4577 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4578 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4579 
4580 		/* set the VMID assigned */
4581 		WREG32(CP_HPD_EOP_VMID, 0);
4582 
4583 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4584 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4585 		tmp &= ~EOP_SIZE_MASK;
4586 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4587 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4588 
4589 	}
4590 	cik_srbm_select(rdev, 0, 0, 0, 0);
4591 	mutex_unlock(&rdev->srbm_mutex);
4592 
4593 	/* init the queues.  Just two for now. */
4594 	for (i = 0; i < 2; i++) {
4595 		if (i == 0)
4596 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4597 		else
4598 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4599 
4600 		if (rdev->ring[idx].mqd_obj == NULL) {
4601 			r = radeon_bo_create(rdev,
4602 					     sizeof(struct bonaire_mqd),
4603 					     PAGE_SIZE, true,
4604 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4605 					     NULL, &rdev->ring[idx].mqd_obj);
4606 			if (r) {
4607 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4608 				return r;
4609 			}
4610 		}
4611 
4612 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4613 		if (unlikely(r != 0)) {
4614 			cik_cp_compute_fini(rdev);
4615 			return r;
4616 		}
4617 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4618 				  &mqd_gpu_addr);
4619 		if (r) {
4620 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4621 			cik_cp_compute_fini(rdev);
4622 			return r;
4623 		}
4624 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4625 		if (r) {
4626 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4627 			cik_cp_compute_fini(rdev);
4628 			return r;
4629 		}
4630 
4631 		/* init the mqd struct */
4632 		memset(buf, 0, sizeof(struct bonaire_mqd));
4633 
4634 		mqd = (struct bonaire_mqd *)buf;
4635 		mqd->header = 0xC0310800;
4636 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4637 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4638 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4639 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4640 
4641 		mutex_lock(&rdev->srbm_mutex);
4642 		cik_srbm_select(rdev, rdev->ring[idx].me,
4643 				rdev->ring[idx].pipe,
4644 				rdev->ring[idx].queue, 0);
4645 
4646 		/* disable wptr polling */
4647 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4648 		tmp &= ~WPTR_POLL_EN;
4649 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4650 
4651 		/* enable doorbell? */
4652 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4653 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4654 		if (use_doorbell)
4655 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4656 		else
4657 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4658 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4659 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4660 
4661 		/* disable the queue if it's active */
4662 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4663 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4664 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4665 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4666 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4667 			for (j = 0; j < rdev->usec_timeout; j++) {
4668 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4669 					break;
4670 				udelay(1);
4671 			}
4672 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4673 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4674 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4675 		}
4676 
4677 		/* set the pointer to the MQD */
4678 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4679 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4680 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4681 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4682 		/* set MQD vmid to 0 */
4683 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4684 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4685 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4686 
4687 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4688 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4689 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4690 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4691 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4692 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4693 
4694 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4695 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4696 		mqd->queue_state.cp_hqd_pq_control &=
4697 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4698 
4699 		mqd->queue_state.cp_hqd_pq_control |=
4700 			order_base_2(rdev->ring[idx].ring_size / 8);
4701 		mqd->queue_state.cp_hqd_pq_control |=
4702 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4703 #ifdef __BIG_ENDIAN
4704 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4705 #endif
4706 		mqd->queue_state.cp_hqd_pq_control &=
4707 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4708 		mqd->queue_state.cp_hqd_pq_control |=
4709 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4710 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4711 
4712 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4713 		if (i == 0)
4714 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4715 		else
4716 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4717 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4718 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4719 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4720 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4721 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4722 
4723 		/* set the wb address wether it's enabled or not */
4724 		if (i == 0)
4725 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4726 		else
4727 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4728 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4729 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4730 			upper_32_bits(wb_gpu_addr) & 0xffff;
4731 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4732 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4733 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4734 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4735 
4736 		/* enable the doorbell if requested */
4737 		if (use_doorbell) {
4738 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4739 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4740 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4741 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4742 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4743 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4744 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4745 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4746 
4747 		} else {
4748 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4749 		}
4750 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4751 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4752 
4753 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4754 		rdev->ring[idx].wptr = 0;
4755 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4756 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4757 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4758 
4759 		/* set the vmid for the queue */
4760 		mqd->queue_state.cp_hqd_vmid = 0;
4761 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4762 
4763 		/* activate the queue */
4764 		mqd->queue_state.cp_hqd_active = 1;
4765 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4766 
4767 		cik_srbm_select(rdev, 0, 0, 0, 0);
4768 		mutex_unlock(&rdev->srbm_mutex);
4769 
4770 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4771 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4772 
4773 		rdev->ring[idx].ready = true;
4774 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4775 		if (r)
4776 			rdev->ring[idx].ready = false;
4777 	}
4778 
4779 	return 0;
4780 }
4781 
4782 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4783 {
4784 	cik_cp_gfx_enable(rdev, enable);
4785 	cik_cp_compute_enable(rdev, enable);
4786 }
4787 
4788 static int cik_cp_load_microcode(struct radeon_device *rdev)
4789 {
4790 	int r;
4791 
4792 	r = cik_cp_gfx_load_microcode(rdev);
4793 	if (r)
4794 		return r;
4795 	r = cik_cp_compute_load_microcode(rdev);
4796 	if (r)
4797 		return r;
4798 
4799 	return 0;
4800 }
4801 
4802 static void cik_cp_fini(struct radeon_device *rdev)
4803 {
4804 	cik_cp_gfx_fini(rdev);
4805 	cik_cp_compute_fini(rdev);
4806 }
4807 
4808 static int cik_cp_resume(struct radeon_device *rdev)
4809 {
4810 	int r;
4811 
4812 	cik_enable_gui_idle_interrupt(rdev, false);
4813 
4814 	r = cik_cp_load_microcode(rdev);
4815 	if (r)
4816 		return r;
4817 
4818 	r = cik_cp_gfx_resume(rdev);
4819 	if (r)
4820 		return r;
4821 	r = cik_cp_compute_resume(rdev);
4822 	if (r)
4823 		return r;
4824 
4825 	cik_enable_gui_idle_interrupt(rdev, true);
4826 
4827 	return 0;
4828 }
4829 
4830 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4831 {
4832 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4833 		RREG32(GRBM_STATUS));
4834 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4835 		RREG32(GRBM_STATUS2));
4836 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4837 		RREG32(GRBM_STATUS_SE0));
4838 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4839 		RREG32(GRBM_STATUS_SE1));
4840 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4841 		RREG32(GRBM_STATUS_SE2));
4842 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4843 		RREG32(GRBM_STATUS_SE3));
4844 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4845 		RREG32(SRBM_STATUS));
4846 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4847 		RREG32(SRBM_STATUS2));
4848 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4849 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4850 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4851 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4852 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4853 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4854 		 RREG32(CP_STALLED_STAT1));
4855 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4856 		 RREG32(CP_STALLED_STAT2));
4857 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4858 		 RREG32(CP_STALLED_STAT3));
4859 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4860 		 RREG32(CP_CPF_BUSY_STAT));
4861 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4862 		 RREG32(CP_CPF_STALLED_STAT1));
4863 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4864 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4865 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4866 		 RREG32(CP_CPC_STALLED_STAT1));
4867 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4868 }
4869 
4870 /**
4871  * cik_gpu_check_soft_reset - check which blocks are busy
4872  *
4873  * @rdev: radeon_device pointer
4874  *
4875  * Check which blocks are busy and return the relevant reset
4876  * mask to be used by cik_gpu_soft_reset().
4877  * Returns a mask of the blocks to be reset.
4878  */
4879 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4880 {
4881 	u32 reset_mask = 0;
4882 	u32 tmp;
4883 
4884 	/* GRBM_STATUS */
4885 	tmp = RREG32(GRBM_STATUS);
4886 	if (tmp & (PA_BUSY | SC_BUSY |
4887 		   BCI_BUSY | SX_BUSY |
4888 		   TA_BUSY | VGT_BUSY |
4889 		   DB_BUSY | CB_BUSY |
4890 		   GDS_BUSY | SPI_BUSY |
4891 		   IA_BUSY | IA_BUSY_NO_DMA))
4892 		reset_mask |= RADEON_RESET_GFX;
4893 
4894 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4895 		reset_mask |= RADEON_RESET_CP;
4896 
4897 	/* GRBM_STATUS2 */
4898 	tmp = RREG32(GRBM_STATUS2);
4899 	if (tmp & RLC_BUSY)
4900 		reset_mask |= RADEON_RESET_RLC;
4901 
4902 	/* SDMA0_STATUS_REG */
4903 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4904 	if (!(tmp & SDMA_IDLE))
4905 		reset_mask |= RADEON_RESET_DMA;
4906 
4907 	/* SDMA1_STATUS_REG */
4908 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4909 	if (!(tmp & SDMA_IDLE))
4910 		reset_mask |= RADEON_RESET_DMA1;
4911 
4912 	/* SRBM_STATUS2 */
4913 	tmp = RREG32(SRBM_STATUS2);
4914 	if (tmp & SDMA_BUSY)
4915 		reset_mask |= RADEON_RESET_DMA;
4916 
4917 	if (tmp & SDMA1_BUSY)
4918 		reset_mask |= RADEON_RESET_DMA1;
4919 
4920 	/* SRBM_STATUS */
4921 	tmp = RREG32(SRBM_STATUS);
4922 
4923 	if (tmp & IH_BUSY)
4924 		reset_mask |= RADEON_RESET_IH;
4925 
4926 	if (tmp & SEM_BUSY)
4927 		reset_mask |= RADEON_RESET_SEM;
4928 
4929 	if (tmp & GRBM_RQ_PENDING)
4930 		reset_mask |= RADEON_RESET_GRBM;
4931 
4932 	if (tmp & VMC_BUSY)
4933 		reset_mask |= RADEON_RESET_VMC;
4934 
4935 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4936 		   MCC_BUSY | MCD_BUSY))
4937 		reset_mask |= RADEON_RESET_MC;
4938 
4939 	if (evergreen_is_display_hung(rdev))
4940 		reset_mask |= RADEON_RESET_DISPLAY;
4941 
4942 	/* Skip MC reset as it's mostly likely not hung, just busy */
4943 	if (reset_mask & RADEON_RESET_MC) {
4944 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4945 		reset_mask &= ~RADEON_RESET_MC;
4946 	}
4947 
4948 	return reset_mask;
4949 }
4950 
4951 /**
4952  * cik_gpu_soft_reset - soft reset GPU
4953  *
4954  * @rdev: radeon_device pointer
4955  * @reset_mask: mask of which blocks to reset
4956  *
4957  * Soft reset the blocks specified in @reset_mask.
4958  */
4959 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4960 {
4961 	struct evergreen_mc_save save;
4962 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4963 	u32 tmp;
4964 
4965 	if (reset_mask == 0)
4966 		return;
4967 
4968 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4969 
4970 	cik_print_gpu_status_regs(rdev);
4971 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4972 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4973 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4974 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4975 
4976 	/* disable CG/PG */
4977 	cik_fini_pg(rdev);
4978 	cik_fini_cg(rdev);
4979 
4980 	/* stop the rlc */
4981 	cik_rlc_stop(rdev);
4982 
4983 	/* Disable GFX parsing/prefetching */
4984 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4985 
4986 	/* Disable MEC parsing/prefetching */
4987 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4988 
4989 	if (reset_mask & RADEON_RESET_DMA) {
4990 		/* sdma0 */
4991 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4992 		tmp |= SDMA_HALT;
4993 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4994 	}
4995 	if (reset_mask & RADEON_RESET_DMA1) {
4996 		/* sdma1 */
4997 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4998 		tmp |= SDMA_HALT;
4999 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5000 	}
5001 
5002 	evergreen_mc_stop(rdev, &save);
5003 	if (evergreen_mc_wait_for_idle(rdev)) {
5004 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5005 	}
5006 
5007 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5008 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5009 
5010 	if (reset_mask & RADEON_RESET_CP) {
5011 		grbm_soft_reset |= SOFT_RESET_CP;
5012 
5013 		srbm_soft_reset |= SOFT_RESET_GRBM;
5014 	}
5015 
5016 	if (reset_mask & RADEON_RESET_DMA)
5017 		srbm_soft_reset |= SOFT_RESET_SDMA;
5018 
5019 	if (reset_mask & RADEON_RESET_DMA1)
5020 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5021 
5022 	if (reset_mask & RADEON_RESET_DISPLAY)
5023 		srbm_soft_reset |= SOFT_RESET_DC;
5024 
5025 	if (reset_mask & RADEON_RESET_RLC)
5026 		grbm_soft_reset |= SOFT_RESET_RLC;
5027 
5028 	if (reset_mask & RADEON_RESET_SEM)
5029 		srbm_soft_reset |= SOFT_RESET_SEM;
5030 
5031 	if (reset_mask & RADEON_RESET_IH)
5032 		srbm_soft_reset |= SOFT_RESET_IH;
5033 
5034 	if (reset_mask & RADEON_RESET_GRBM)
5035 		srbm_soft_reset |= SOFT_RESET_GRBM;
5036 
5037 	if (reset_mask & RADEON_RESET_VMC)
5038 		srbm_soft_reset |= SOFT_RESET_VMC;
5039 
5040 	if (!(rdev->flags & RADEON_IS_IGP)) {
5041 		if (reset_mask & RADEON_RESET_MC)
5042 			srbm_soft_reset |= SOFT_RESET_MC;
5043 	}
5044 
5045 	if (grbm_soft_reset) {
5046 		tmp = RREG32(GRBM_SOFT_RESET);
5047 		tmp |= grbm_soft_reset;
5048 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5049 		WREG32(GRBM_SOFT_RESET, tmp);
5050 		tmp = RREG32(GRBM_SOFT_RESET);
5051 
5052 		udelay(50);
5053 
5054 		tmp &= ~grbm_soft_reset;
5055 		WREG32(GRBM_SOFT_RESET, tmp);
5056 		tmp = RREG32(GRBM_SOFT_RESET);
5057 	}
5058 
5059 	if (srbm_soft_reset) {
5060 		tmp = RREG32(SRBM_SOFT_RESET);
5061 		tmp |= srbm_soft_reset;
5062 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5063 		WREG32(SRBM_SOFT_RESET, tmp);
5064 		tmp = RREG32(SRBM_SOFT_RESET);
5065 
5066 		udelay(50);
5067 
5068 		tmp &= ~srbm_soft_reset;
5069 		WREG32(SRBM_SOFT_RESET, tmp);
5070 		tmp = RREG32(SRBM_SOFT_RESET);
5071 	}
5072 
5073 	/* Wait a little for things to settle down */
5074 	udelay(50);
5075 
5076 	evergreen_mc_resume(rdev, &save);
5077 	udelay(50);
5078 
5079 	cik_print_gpu_status_regs(rdev);
5080 }
5081 
5082 struct kv_reset_save_regs {
5083 	u32 gmcon_reng_execute;
5084 	u32 gmcon_misc;
5085 	u32 gmcon_misc3;
5086 };
5087 
5088 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5089 				   struct kv_reset_save_regs *save)
5090 {
5091 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5092 	save->gmcon_misc = RREG32(GMCON_MISC);
5093 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5094 
5095 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5096 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5097 						STCTRL_STUTTER_EN));
5098 }
5099 
5100 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5101 				      struct kv_reset_save_regs *save)
5102 {
5103 	int i;
5104 
5105 	WREG32(GMCON_PGFSM_WRITE, 0);
5106 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5107 
5108 	for (i = 0; i < 5; i++)
5109 		WREG32(GMCON_PGFSM_WRITE, 0);
5110 
5111 	WREG32(GMCON_PGFSM_WRITE, 0);
5112 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5113 
5114 	for (i = 0; i < 5; i++)
5115 		WREG32(GMCON_PGFSM_WRITE, 0);
5116 
5117 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5118 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5119 
5120 	for (i = 0; i < 5; i++)
5121 		WREG32(GMCON_PGFSM_WRITE, 0);
5122 
5123 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5124 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5125 
5126 	for (i = 0; i < 5; i++)
5127 		WREG32(GMCON_PGFSM_WRITE, 0);
5128 
5129 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5130 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5131 
5132 	for (i = 0; i < 5; i++)
5133 		WREG32(GMCON_PGFSM_WRITE, 0);
5134 
5135 	WREG32(GMCON_PGFSM_WRITE, 0);
5136 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5137 
5138 	for (i = 0; i < 5; i++)
5139 		WREG32(GMCON_PGFSM_WRITE, 0);
5140 
5141 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5142 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5143 
5144 	for (i = 0; i < 5; i++)
5145 		WREG32(GMCON_PGFSM_WRITE, 0);
5146 
5147 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5148 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5149 
5150 	for (i = 0; i < 5; i++)
5151 		WREG32(GMCON_PGFSM_WRITE, 0);
5152 
5153 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5154 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5155 
5156 	for (i = 0; i < 5; i++)
5157 		WREG32(GMCON_PGFSM_WRITE, 0);
5158 
5159 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5160 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5161 
5162 	for (i = 0; i < 5; i++)
5163 		WREG32(GMCON_PGFSM_WRITE, 0);
5164 
5165 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5166 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5167 
5168 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5169 	WREG32(GMCON_MISC, save->gmcon_misc);
5170 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5171 }
5172 
5173 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5174 {
5175 	struct evergreen_mc_save save;
5176 	struct kv_reset_save_regs kv_save = { 0 };
5177 	u32 tmp, i;
5178 
5179 	dev_info(rdev->dev, "GPU pci config reset\n");
5180 
5181 	/* disable dpm? */
5182 
5183 	/* disable cg/pg */
5184 	cik_fini_pg(rdev);
5185 	cik_fini_cg(rdev);
5186 
5187 	/* Disable GFX parsing/prefetching */
5188 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5189 
5190 	/* Disable MEC parsing/prefetching */
5191 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5192 
5193 	/* sdma0 */
5194 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5195 	tmp |= SDMA_HALT;
5196 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5197 	/* sdma1 */
5198 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5199 	tmp |= SDMA_HALT;
5200 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5201 	/* XXX other engines? */
5202 
5203 	/* halt the rlc, disable cp internal ints */
5204 	cik_rlc_stop(rdev);
5205 
5206 	udelay(50);
5207 
5208 	/* disable mem access */
5209 	evergreen_mc_stop(rdev, &save);
5210 	if (evergreen_mc_wait_for_idle(rdev)) {
5211 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5212 	}
5213 
5214 	if (rdev->flags & RADEON_IS_IGP)
5215 		kv_save_regs_for_reset(rdev, &kv_save);
5216 
5217 	/* disable BM */
5218 	pci_clear_master(rdev->pdev);
5219 	/* reset */
5220 	radeon_pci_config_reset(rdev);
5221 
5222 	udelay(100);
5223 
5224 	/* wait for asic to come out of reset */
5225 	for (i = 0; i < rdev->usec_timeout; i++) {
5226 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5227 			break;
5228 		udelay(1);
5229 	}
5230 
5231 	/* does asic init need to be run first??? */
5232 	if (rdev->flags & RADEON_IS_IGP)
5233 		kv_restore_regs_for_reset(rdev, &kv_save);
5234 }
5235 
5236 /**
5237  * cik_asic_reset - soft reset GPU
5238  *
5239  * @rdev: radeon_device pointer
5240  * @hard: force hard reset
5241  *
5242  * Look up which blocks are hung and attempt
5243  * to reset them.
5244  * Returns 0 for success.
5245  */
5246 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5247 {
5248 	u32 reset_mask;
5249 
5250 	if (hard) {
5251 		cik_gpu_pci_config_reset(rdev);
5252 		return 0;
5253 	}
5254 
5255 	reset_mask = cik_gpu_check_soft_reset(rdev);
5256 
5257 	if (reset_mask)
5258 		r600_set_bios_scratch_engine_hung(rdev, true);
5259 
5260 	/* try soft reset */
5261 	cik_gpu_soft_reset(rdev, reset_mask);
5262 
5263 	reset_mask = cik_gpu_check_soft_reset(rdev);
5264 
5265 	/* try pci config reset */
5266 	if (reset_mask && radeon_hard_reset)
5267 		cik_gpu_pci_config_reset(rdev);
5268 
5269 	reset_mask = cik_gpu_check_soft_reset(rdev);
5270 
5271 	if (!reset_mask)
5272 		r600_set_bios_scratch_engine_hung(rdev, false);
5273 
5274 	return 0;
5275 }
5276 
5277 /**
5278  * cik_gfx_is_lockup - check if the 3D engine is locked up
5279  *
5280  * @rdev: radeon_device pointer
5281  * @ring: radeon_ring structure holding ring information
5282  *
5283  * Check if the 3D engine is locked up (CIK).
5284  * Returns true if the engine is locked, false if not.
5285  */
5286 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5287 {
5288 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5289 
5290 	if (!(reset_mask & (RADEON_RESET_GFX |
5291 			    RADEON_RESET_COMPUTE |
5292 			    RADEON_RESET_CP))) {
5293 		radeon_ring_lockup_update(rdev, ring);
5294 		return false;
5295 	}
5296 	return radeon_ring_test_lockup(rdev, ring);
5297 }
5298 
5299 /* MC */
5300 /**
5301  * cik_mc_program - program the GPU memory controller
5302  *
5303  * @rdev: radeon_device pointer
5304  *
5305  * Set the location of vram, gart, and AGP in the GPU's
5306  * physical address space (CIK).
5307  */
5308 static void cik_mc_program(struct radeon_device *rdev)
5309 {
5310 	struct evergreen_mc_save save;
5311 	u32 tmp;
5312 	int i, j;
5313 
5314 	/* Initialize HDP */
5315 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5316 		WREG32((0x2c14 + j), 0x00000000);
5317 		WREG32((0x2c18 + j), 0x00000000);
5318 		WREG32((0x2c1c + j), 0x00000000);
5319 		WREG32((0x2c20 + j), 0x00000000);
5320 		WREG32((0x2c24 + j), 0x00000000);
5321 	}
5322 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5323 
5324 	evergreen_mc_stop(rdev, &save);
5325 	if (radeon_mc_wait_for_idle(rdev)) {
5326 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5327 	}
5328 	/* Lockout access through VGA aperture*/
5329 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5330 	/* Update configuration */
5331 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5332 	       rdev->mc.vram_start >> 12);
5333 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5334 	       rdev->mc.vram_end >> 12);
5335 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5336 	       rdev->vram_scratch.gpu_addr >> 12);
5337 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5338 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5339 	WREG32(MC_VM_FB_LOCATION, tmp);
5340 	/* XXX double check these! */
5341 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5342 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5343 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5344 	WREG32(MC_VM_AGP_BASE, 0);
5345 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5346 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5347 	if (radeon_mc_wait_for_idle(rdev)) {
5348 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5349 	}
5350 	evergreen_mc_resume(rdev, &save);
5351 	/* we need to own VRAM, so turn off the VGA renderer here
5352 	 * to stop it overwriting our objects */
5353 	rv515_vga_render_disable(rdev);
5354 }
5355 
5356 /**
5357  * cik_mc_init - initialize the memory controller driver params
5358  *
5359  * @rdev: radeon_device pointer
5360  *
5361  * Look up the amount of vram, vram width, and decide how to place
5362  * vram and gart within the GPU's physical address space (CIK).
5363  * Returns 0 for success.
5364  */
5365 static int cik_mc_init(struct radeon_device *rdev)
5366 {
5367 	u32 tmp;
5368 	int chansize, numchan;
5369 
5370 	/* Get VRAM informations */
5371 	rdev->mc.vram_is_ddr = true;
5372 	tmp = RREG32(MC_ARB_RAMCFG);
5373 	if (tmp & CHANSIZE_MASK) {
5374 		chansize = 64;
5375 	} else {
5376 		chansize = 32;
5377 	}
5378 	tmp = RREG32(MC_SHARED_CHMAP);
5379 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5380 	case 0:
5381 	default:
5382 		numchan = 1;
5383 		break;
5384 	case 1:
5385 		numchan = 2;
5386 		break;
5387 	case 2:
5388 		numchan = 4;
5389 		break;
5390 	case 3:
5391 		numchan = 8;
5392 		break;
5393 	case 4:
5394 		numchan = 3;
5395 		break;
5396 	case 5:
5397 		numchan = 6;
5398 		break;
5399 	case 6:
5400 		numchan = 10;
5401 		break;
5402 	case 7:
5403 		numchan = 12;
5404 		break;
5405 	case 8:
5406 		numchan = 16;
5407 		break;
5408 	}
5409 	rdev->mc.vram_width = numchan * chansize;
5410 	/* Could aper size report 0 ? */
5411 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5412 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5413 	/* size in MB on si */
5414 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5415 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5416 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5417 	si_vram_gtt_location(rdev, &rdev->mc);
5418 	radeon_update_bandwidth_info(rdev);
5419 
5420 	return 0;
5421 }
5422 
5423 /*
5424  * GART
5425  * VMID 0 is the physical GPU addresses as used by the kernel.
5426  * VMIDs 1-15 are used for userspace clients and are handled
5427  * by the radeon vm/hsa code.
5428  */
5429 /**
5430  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5431  *
5432  * @rdev: radeon_device pointer
5433  *
5434  * Flush the TLB for the VMID 0 page table (CIK).
5435  */
5436 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5437 {
5438 	/* flush hdp cache */
5439 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5440 
5441 	/* bits 0-15 are the VM contexts0-15 */
5442 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5443 }
5444 
5445 /**
5446  * cik_pcie_gart_enable - gart enable
5447  *
5448  * @rdev: radeon_device pointer
5449  *
5450  * This sets up the TLBs, programs the page tables for VMID0,
5451  * sets up the hw for VMIDs 1-15 which are allocated on
5452  * demand, and sets up the global locations for the LDS, GDS,
5453  * and GPUVM for FSA64 clients (CIK).
5454  * Returns 0 for success, errors for failure.
5455  */
5456 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5457 {
5458 	int r, i;
5459 
5460 	if (rdev->gart.robj == NULL) {
5461 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5462 		return -EINVAL;
5463 	}
5464 	r = radeon_gart_table_vram_pin(rdev);
5465 	if (r)
5466 		return r;
5467 	/* Setup TLB control */
5468 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5469 	       (0xA << 7) |
5470 	       ENABLE_L1_TLB |
5471 	       ENABLE_L1_FRAGMENT_PROCESSING |
5472 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5473 	       ENABLE_ADVANCED_DRIVER_MODEL |
5474 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5475 	/* Setup L2 cache */
5476 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5477 	       ENABLE_L2_FRAGMENT_PROCESSING |
5478 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5479 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5480 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5481 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5482 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5483 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5484 	       BANK_SELECT(4) |
5485 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5486 	/* setup context0 */
5487 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5488 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5489 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5490 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5491 			(u32)(rdev->dummy_page.addr >> 12));
5492 	WREG32(VM_CONTEXT0_CNTL2, 0);
5493 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5494 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5495 
5496 	WREG32(0x15D4, 0);
5497 	WREG32(0x15D8, 0);
5498 	WREG32(0x15DC, 0);
5499 
5500 	/* restore context1-15 */
5501 	/* set vm size, must be a multiple of 4 */
5502 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5503 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5504 	for (i = 1; i < 16; i++) {
5505 		if (i < 8)
5506 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5507 			       rdev->vm_manager.saved_table_addr[i]);
5508 		else
5509 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5510 			       rdev->vm_manager.saved_table_addr[i]);
5511 	}
5512 
5513 	/* enable context1-15 */
5514 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5515 	       (u32)(rdev->dummy_page.addr >> 12));
5516 	WREG32(VM_CONTEXT1_CNTL2, 4);
5517 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5518 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5519 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5521 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5522 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5523 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5524 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5525 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5526 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5527 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5528 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5529 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5530 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5531 
5532 	if (rdev->family == CHIP_KAVERI) {
5533 		u32 tmp = RREG32(CHUB_CONTROL);
5534 		tmp &= ~BYPASS_VM;
5535 		WREG32(CHUB_CONTROL, tmp);
5536 	}
5537 
5538 	/* XXX SH_MEM regs */
5539 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5540 	mutex_lock(&rdev->srbm_mutex);
5541 	for (i = 0; i < 16; i++) {
5542 		cik_srbm_select(rdev, 0, 0, 0, i);
5543 		/* CP and shaders */
5544 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5545 		WREG32(SH_MEM_APE1_BASE, 1);
5546 		WREG32(SH_MEM_APE1_LIMIT, 0);
5547 		WREG32(SH_MEM_BASES, 0);
5548 		/* SDMA GFX */
5549 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5550 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5551 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5552 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5553 		/* XXX SDMA RLC - todo */
5554 	}
5555 	cik_srbm_select(rdev, 0, 0, 0, 0);
5556 	mutex_unlock(&rdev->srbm_mutex);
5557 
5558 	cik_pcie_gart_tlb_flush(rdev);
5559 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5560 		 (unsigned)(rdev->mc.gtt_size >> 20),
5561 		 (unsigned long long)rdev->gart.table_addr);
5562 	rdev->gart.ready = true;
5563 	return 0;
5564 }
5565 
5566 /**
5567  * cik_pcie_gart_disable - gart disable
5568  *
5569  * @rdev: radeon_device pointer
5570  *
5571  * This disables all VM page table (CIK).
5572  */
5573 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5574 {
5575 	unsigned i;
5576 
5577 	for (i = 1; i < 16; ++i) {
5578 		uint32_t reg;
5579 		if (i < 8)
5580 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5581 		else
5582 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5583 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5584 	}
5585 
5586 	/* Disable all tables */
5587 	WREG32(VM_CONTEXT0_CNTL, 0);
5588 	WREG32(VM_CONTEXT1_CNTL, 0);
5589 	/* Setup TLB control */
5590 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5591 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5592 	/* Setup L2 cache */
5593 	WREG32(VM_L2_CNTL,
5594 	       ENABLE_L2_FRAGMENT_PROCESSING |
5595 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5596 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5597 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5598 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5599 	WREG32(VM_L2_CNTL2, 0);
5600 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5601 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5602 	radeon_gart_table_vram_unpin(rdev);
5603 }
5604 
5605 /**
5606  * cik_pcie_gart_fini - vm fini callback
5607  *
5608  * @rdev: radeon_device pointer
5609  *
5610  * Tears down the driver GART/VM setup (CIK).
5611  */
5612 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5613 {
5614 	cik_pcie_gart_disable(rdev);
5615 	radeon_gart_table_vram_free(rdev);
5616 	radeon_gart_fini(rdev);
5617 }
5618 
5619 /* vm parser */
5620 /**
5621  * cik_ib_parse - vm ib_parse callback
5622  *
5623  * @rdev: radeon_device pointer
5624  * @ib: indirect buffer pointer
5625  *
5626  * CIK uses hw IB checking so this is a nop (CIK).
5627  */
5628 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5629 {
5630 	return 0;
5631 }
5632 
5633 /*
5634  * vm
5635  * VMID 0 is the physical GPU addresses as used by the kernel.
5636  * VMIDs 1-15 are used for userspace clients and are handled
5637  * by the radeon vm/hsa code.
5638  */
5639 /**
5640  * cik_vm_init - cik vm init callback
5641  *
5642  * @rdev: radeon_device pointer
5643  *
5644  * Inits cik specific vm parameters (number of VMs, base of vram for
5645  * VMIDs 1-15) (CIK).
5646  * Returns 0 for success.
5647  */
5648 int cik_vm_init(struct radeon_device *rdev)
5649 {
5650 	/*
5651 	 * number of VMs
5652 	 * VMID 0 is reserved for System
5653 	 * radeon graphics/compute will use VMIDs 1-15
5654 	 */
5655 	rdev->vm_manager.nvm = 16;
5656 	/* base offset of vram pages */
5657 	if (rdev->flags & RADEON_IS_IGP) {
5658 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5659 		tmp <<= 22;
5660 		rdev->vm_manager.vram_base_offset = tmp;
5661 	} else
5662 		rdev->vm_manager.vram_base_offset = 0;
5663 
5664 	return 0;
5665 }
5666 
5667 /**
5668  * cik_vm_fini - cik vm fini callback
5669  *
5670  * @rdev: radeon_device pointer
5671  *
5672  * Tear down any asic specific VM setup (CIK).
5673  */
5674 void cik_vm_fini(struct radeon_device *rdev)
5675 {
5676 }
5677 
5678 /**
5679  * cik_vm_decode_fault - print human readable fault info
5680  *
5681  * @rdev: radeon_device pointer
5682  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5683  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5684  *
5685  * Print human readable fault information (CIK).
5686  */
5687 static void cik_vm_decode_fault(struct radeon_device *rdev,
5688 				u32 status, u32 addr, u32 mc_client)
5689 {
5690 	u32 mc_id;
5691 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5692 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5693 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5694 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5695 
5696 	if (rdev->family == CHIP_HAWAII)
5697 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5698 	else
5699 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5700 
5701 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5702 	       protections, vmid, addr,
5703 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5704 	       block, mc_client, mc_id);
5705 }
5706 
5707 /**
5708  * cik_vm_flush - cik vm flush using the CP
5709  *
5710  * @rdev: radeon_device pointer
5711  *
5712  * Update the page table base and flush the VM TLB
5713  * using the CP (CIK).
5714  */
5715 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5716 		  unsigned vm_id, uint64_t pd_addr)
5717 {
5718 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5719 
5720 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5721 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5722 				 WRITE_DATA_DST_SEL(0)));
5723 	if (vm_id < 8) {
5724 		radeon_ring_write(ring,
5725 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5726 	} else {
5727 		radeon_ring_write(ring,
5728 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5729 	}
5730 	radeon_ring_write(ring, 0);
5731 	radeon_ring_write(ring, pd_addr >> 12);
5732 
5733 	/* update SH_MEM_* regs */
5734 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5735 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5736 				 WRITE_DATA_DST_SEL(0)));
5737 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5738 	radeon_ring_write(ring, 0);
5739 	radeon_ring_write(ring, VMID(vm_id));
5740 
5741 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5742 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5743 				 WRITE_DATA_DST_SEL(0)));
5744 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5745 	radeon_ring_write(ring, 0);
5746 
5747 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5748 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5749 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5750 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5751 
5752 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5753 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5754 				 WRITE_DATA_DST_SEL(0)));
5755 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5756 	radeon_ring_write(ring, 0);
5757 	radeon_ring_write(ring, VMID(0));
5758 
5759 	/* HDP flush */
5760 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5761 
5762 	/* bits 0-15 are the VM contexts0-15 */
5763 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5764 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5765 				 WRITE_DATA_DST_SEL(0)));
5766 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5767 	radeon_ring_write(ring, 0);
5768 	radeon_ring_write(ring, 1 << vm_id);
5769 
5770 	/* wait for the invalidate to complete */
5771 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5772 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5773 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5774 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5775 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5776 	radeon_ring_write(ring, 0);
5777 	radeon_ring_write(ring, 0); /* ref */
5778 	radeon_ring_write(ring, 0); /* mask */
5779 	radeon_ring_write(ring, 0x20); /* poll interval */
5780 
5781 	/* compute doesn't have PFP */
5782 	if (usepfp) {
5783 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5784 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5785 		radeon_ring_write(ring, 0x0);
5786 	}
5787 }
5788 
5789 /*
5790  * RLC
5791  * The RLC is a multi-purpose microengine that handles a
5792  * variety of functions, the most important of which is
5793  * the interrupt controller.
5794  */
5795 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5796 					  bool enable)
5797 {
5798 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5799 
5800 	if (enable)
5801 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5802 	else
5803 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5804 	WREG32(CP_INT_CNTL_RING0, tmp);
5805 }
5806 
5807 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5808 {
5809 	u32 tmp;
5810 
5811 	tmp = RREG32(RLC_LB_CNTL);
5812 	if (enable)
5813 		tmp |= LOAD_BALANCE_ENABLE;
5814 	else
5815 		tmp &= ~LOAD_BALANCE_ENABLE;
5816 	WREG32(RLC_LB_CNTL, tmp);
5817 }
5818 
5819 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5820 {
5821 	u32 i, j, k;
5822 	u32 mask;
5823 
5824 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5825 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5826 			cik_select_se_sh(rdev, i, j);
5827 			for (k = 0; k < rdev->usec_timeout; k++) {
5828 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5829 					break;
5830 				udelay(1);
5831 			}
5832 		}
5833 	}
5834 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5835 
5836 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5837 	for (k = 0; k < rdev->usec_timeout; k++) {
5838 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5839 			break;
5840 		udelay(1);
5841 	}
5842 }
5843 
5844 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5845 {
5846 	u32 tmp;
5847 
5848 	tmp = RREG32(RLC_CNTL);
5849 	if (tmp != rlc)
5850 		WREG32(RLC_CNTL, rlc);
5851 }
5852 
5853 static u32 cik_halt_rlc(struct radeon_device *rdev)
5854 {
5855 	u32 data, orig;
5856 
5857 	orig = data = RREG32(RLC_CNTL);
5858 
5859 	if (data & RLC_ENABLE) {
5860 		u32 i;
5861 
5862 		data &= ~RLC_ENABLE;
5863 		WREG32(RLC_CNTL, data);
5864 
5865 		for (i = 0; i < rdev->usec_timeout; i++) {
5866 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5867 				break;
5868 			udelay(1);
5869 		}
5870 
5871 		cik_wait_for_rlc_serdes(rdev);
5872 	}
5873 
5874 	return orig;
5875 }
5876 
5877 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5878 {
5879 	u32 tmp, i, mask;
5880 
5881 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5882 	WREG32(RLC_GPR_REG2, tmp);
5883 
5884 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5885 	for (i = 0; i < rdev->usec_timeout; i++) {
5886 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5887 			break;
5888 		udelay(1);
5889 	}
5890 
5891 	for (i = 0; i < rdev->usec_timeout; i++) {
5892 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5893 			break;
5894 		udelay(1);
5895 	}
5896 }
5897 
5898 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5899 {
5900 	u32 tmp;
5901 
5902 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5903 	WREG32(RLC_GPR_REG2, tmp);
5904 }
5905 
5906 /**
5907  * cik_rlc_stop - stop the RLC ME
5908  *
5909  * @rdev: radeon_device pointer
5910  *
5911  * Halt the RLC ME (MicroEngine) (CIK).
5912  */
5913 static void cik_rlc_stop(struct radeon_device *rdev)
5914 {
5915 	WREG32(RLC_CNTL, 0);
5916 
5917 	cik_enable_gui_idle_interrupt(rdev, false);
5918 
5919 	cik_wait_for_rlc_serdes(rdev);
5920 }
5921 
5922 /**
5923  * cik_rlc_start - start the RLC ME
5924  *
5925  * @rdev: radeon_device pointer
5926  *
5927  * Unhalt the RLC ME (MicroEngine) (CIK).
5928  */
5929 static void cik_rlc_start(struct radeon_device *rdev)
5930 {
5931 	WREG32(RLC_CNTL, RLC_ENABLE);
5932 
5933 	cik_enable_gui_idle_interrupt(rdev, true);
5934 
5935 	udelay(50);
5936 }
5937 
5938 /**
5939  * cik_rlc_resume - setup the RLC hw
5940  *
5941  * @rdev: radeon_device pointer
5942  *
5943  * Initialize the RLC registers, load the ucode,
5944  * and start the RLC (CIK).
5945  * Returns 0 for success, -EINVAL if the ucode is not available.
5946  */
5947 static int cik_rlc_resume(struct radeon_device *rdev)
5948 {
5949 	u32 i, size, tmp;
5950 
5951 	if (!rdev->rlc_fw)
5952 		return -EINVAL;
5953 
5954 	cik_rlc_stop(rdev);
5955 
5956 	/* disable CG */
5957 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5958 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5959 
5960 	si_rlc_reset(rdev);
5961 
5962 	cik_init_pg(rdev);
5963 
5964 	cik_init_cg(rdev);
5965 
5966 	WREG32(RLC_LB_CNTR_INIT, 0);
5967 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5968 
5969 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5970 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5971 	WREG32(RLC_LB_PARAMS, 0x00600408);
5972 	WREG32(RLC_LB_CNTL, 0x80000004);
5973 
5974 	WREG32(RLC_MC_CNTL, 0);
5975 	WREG32(RLC_UCODE_CNTL, 0);
5976 
5977 	if (rdev->new_fw) {
5978 		const struct rlc_firmware_header_v1_0 *hdr =
5979 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5980 		const __le32 *fw_data = (const __le32 *)
5981 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5982 
5983 		radeon_ucode_print_rlc_hdr(&hdr->header);
5984 
5985 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5986 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5987 		for (i = 0; i < size; i++)
5988 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5989 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5990 	} else {
5991 		const __be32 *fw_data;
5992 
5993 		switch (rdev->family) {
5994 		case CHIP_BONAIRE:
5995 		case CHIP_HAWAII:
5996 		default:
5997 			size = BONAIRE_RLC_UCODE_SIZE;
5998 			break;
5999 		case CHIP_KAVERI:
6000 			size = KV_RLC_UCODE_SIZE;
6001 			break;
6002 		case CHIP_KABINI:
6003 			size = KB_RLC_UCODE_SIZE;
6004 			break;
6005 		case CHIP_MULLINS:
6006 			size = ML_RLC_UCODE_SIZE;
6007 			break;
6008 		}
6009 
6010 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6011 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6012 		for (i = 0; i < size; i++)
6013 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6014 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6015 	}
6016 
6017 	/* XXX - find out what chips support lbpw */
6018 	cik_enable_lbpw(rdev, false);
6019 
6020 	if (rdev->family == CHIP_BONAIRE)
6021 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6022 
6023 	cik_rlc_start(rdev);
6024 
6025 	return 0;
6026 }
6027 
6028 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6029 {
6030 	u32 data, orig, tmp, tmp2;
6031 
6032 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6033 
6034 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6035 		cik_enable_gui_idle_interrupt(rdev, true);
6036 
6037 		tmp = cik_halt_rlc(rdev);
6038 
6039 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6040 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6041 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6042 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6043 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6044 
6045 		cik_update_rlc(rdev, tmp);
6046 
6047 		data |= CGCG_EN | CGLS_EN;
6048 	} else {
6049 		cik_enable_gui_idle_interrupt(rdev, false);
6050 
6051 		RREG32(CB_CGTT_SCLK_CTRL);
6052 		RREG32(CB_CGTT_SCLK_CTRL);
6053 		RREG32(CB_CGTT_SCLK_CTRL);
6054 		RREG32(CB_CGTT_SCLK_CTRL);
6055 
6056 		data &= ~(CGCG_EN | CGLS_EN);
6057 	}
6058 
6059 	if (orig != data)
6060 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6061 
6062 }
6063 
6064 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6065 {
6066 	u32 data, orig, tmp = 0;
6067 
6068 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6069 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6070 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6071 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6072 				data |= CP_MEM_LS_EN;
6073 				if (orig != data)
6074 					WREG32(CP_MEM_SLP_CNTL, data);
6075 			}
6076 		}
6077 
6078 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6079 		data |= 0x00000001;
6080 		data &= 0xfffffffd;
6081 		if (orig != data)
6082 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6083 
6084 		tmp = cik_halt_rlc(rdev);
6085 
6086 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6087 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6088 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6089 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6090 		WREG32(RLC_SERDES_WR_CTRL, data);
6091 
6092 		cik_update_rlc(rdev, tmp);
6093 
6094 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6095 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6096 			data &= ~SM_MODE_MASK;
6097 			data |= SM_MODE(0x2);
6098 			data |= SM_MODE_ENABLE;
6099 			data &= ~CGTS_OVERRIDE;
6100 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6101 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6102 				data &= ~CGTS_LS_OVERRIDE;
6103 			data &= ~ON_MONITOR_ADD_MASK;
6104 			data |= ON_MONITOR_ADD_EN;
6105 			data |= ON_MONITOR_ADD(0x96);
6106 			if (orig != data)
6107 				WREG32(CGTS_SM_CTRL_REG, data);
6108 		}
6109 	} else {
6110 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6111 		data |= 0x00000003;
6112 		if (orig != data)
6113 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6114 
6115 		data = RREG32(RLC_MEM_SLP_CNTL);
6116 		if (data & RLC_MEM_LS_EN) {
6117 			data &= ~RLC_MEM_LS_EN;
6118 			WREG32(RLC_MEM_SLP_CNTL, data);
6119 		}
6120 
6121 		data = RREG32(CP_MEM_SLP_CNTL);
6122 		if (data & CP_MEM_LS_EN) {
6123 			data &= ~CP_MEM_LS_EN;
6124 			WREG32(CP_MEM_SLP_CNTL, data);
6125 		}
6126 
6127 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6128 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6129 		if (orig != data)
6130 			WREG32(CGTS_SM_CTRL_REG, data);
6131 
6132 		tmp = cik_halt_rlc(rdev);
6133 
6134 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6135 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6136 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6137 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6138 		WREG32(RLC_SERDES_WR_CTRL, data);
6139 
6140 		cik_update_rlc(rdev, tmp);
6141 	}
6142 }
6143 
6144 static const u32 mc_cg_registers[] =
6145 {
6146 	MC_HUB_MISC_HUB_CG,
6147 	MC_HUB_MISC_SIP_CG,
6148 	MC_HUB_MISC_VM_CG,
6149 	MC_XPB_CLK_GAT,
6150 	ATC_MISC_CG,
6151 	MC_CITF_MISC_WR_CG,
6152 	MC_CITF_MISC_RD_CG,
6153 	MC_CITF_MISC_VM_CG,
6154 	VM_L2_CG,
6155 };
6156 
6157 static void cik_enable_mc_ls(struct radeon_device *rdev,
6158 			     bool enable)
6159 {
6160 	int i;
6161 	u32 orig, data;
6162 
6163 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6164 		orig = data = RREG32(mc_cg_registers[i]);
6165 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6166 			data |= MC_LS_ENABLE;
6167 		else
6168 			data &= ~MC_LS_ENABLE;
6169 		if (data != orig)
6170 			WREG32(mc_cg_registers[i], data);
6171 	}
6172 }
6173 
6174 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6175 			       bool enable)
6176 {
6177 	int i;
6178 	u32 orig, data;
6179 
6180 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6181 		orig = data = RREG32(mc_cg_registers[i]);
6182 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6183 			data |= MC_CG_ENABLE;
6184 		else
6185 			data &= ~MC_CG_ENABLE;
6186 		if (data != orig)
6187 			WREG32(mc_cg_registers[i], data);
6188 	}
6189 }
6190 
6191 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6192 				 bool enable)
6193 {
6194 	u32 orig, data;
6195 
6196 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6197 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6198 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6199 	} else {
6200 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6201 		data |= 0xff000000;
6202 		if (data != orig)
6203 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6204 
6205 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6206 		data |= 0xff000000;
6207 		if (data != orig)
6208 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6209 	}
6210 }
6211 
6212 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6213 				 bool enable)
6214 {
6215 	u32 orig, data;
6216 
6217 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6218 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6219 		data |= 0x100;
6220 		if (orig != data)
6221 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6222 
6223 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6224 		data |= 0x100;
6225 		if (orig != data)
6226 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6227 	} else {
6228 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6229 		data &= ~0x100;
6230 		if (orig != data)
6231 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6232 
6233 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6234 		data &= ~0x100;
6235 		if (orig != data)
6236 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6237 	}
6238 }
6239 
6240 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6241 				bool enable)
6242 {
6243 	u32 orig, data;
6244 
6245 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6246 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6247 		data = 0xfff;
6248 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6249 
6250 		orig = data = RREG32(UVD_CGC_CTRL);
6251 		data |= DCM;
6252 		if (orig != data)
6253 			WREG32(UVD_CGC_CTRL, data);
6254 	} else {
6255 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6256 		data &= ~0xfff;
6257 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6258 
6259 		orig = data = RREG32(UVD_CGC_CTRL);
6260 		data &= ~DCM;
6261 		if (orig != data)
6262 			WREG32(UVD_CGC_CTRL, data);
6263 	}
6264 }
6265 
6266 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6267 			       bool enable)
6268 {
6269 	u32 orig, data;
6270 
6271 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6272 
6273 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6274 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6275 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6276 	else
6277 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6278 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6279 
6280 	if (orig != data)
6281 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6282 }
6283 
6284 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6285 				bool enable)
6286 {
6287 	u32 orig, data;
6288 
6289 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6290 
6291 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6292 		data &= ~CLOCK_GATING_DIS;
6293 	else
6294 		data |= CLOCK_GATING_DIS;
6295 
6296 	if (orig != data)
6297 		WREG32(HDP_HOST_PATH_CNTL, data);
6298 }
6299 
6300 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6301 			      bool enable)
6302 {
6303 	u32 orig, data;
6304 
6305 	orig = data = RREG32(HDP_MEM_POWER_LS);
6306 
6307 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6308 		data |= HDP_LS_ENABLE;
6309 	else
6310 		data &= ~HDP_LS_ENABLE;
6311 
6312 	if (orig != data)
6313 		WREG32(HDP_MEM_POWER_LS, data);
6314 }
6315 
6316 void cik_update_cg(struct radeon_device *rdev,
6317 		   u32 block, bool enable)
6318 {
6319 
6320 	if (block & RADEON_CG_BLOCK_GFX) {
6321 		cik_enable_gui_idle_interrupt(rdev, false);
6322 		/* order matters! */
6323 		if (enable) {
6324 			cik_enable_mgcg(rdev, true);
6325 			cik_enable_cgcg(rdev, true);
6326 		} else {
6327 			cik_enable_cgcg(rdev, false);
6328 			cik_enable_mgcg(rdev, false);
6329 		}
6330 		cik_enable_gui_idle_interrupt(rdev, true);
6331 	}
6332 
6333 	if (block & RADEON_CG_BLOCK_MC) {
6334 		if (!(rdev->flags & RADEON_IS_IGP)) {
6335 			cik_enable_mc_mgcg(rdev, enable);
6336 			cik_enable_mc_ls(rdev, enable);
6337 		}
6338 	}
6339 
6340 	if (block & RADEON_CG_BLOCK_SDMA) {
6341 		cik_enable_sdma_mgcg(rdev, enable);
6342 		cik_enable_sdma_mgls(rdev, enable);
6343 	}
6344 
6345 	if (block & RADEON_CG_BLOCK_BIF) {
6346 		cik_enable_bif_mgls(rdev, enable);
6347 	}
6348 
6349 	if (block & RADEON_CG_BLOCK_UVD) {
6350 		if (rdev->has_uvd)
6351 			cik_enable_uvd_mgcg(rdev, enable);
6352 	}
6353 
6354 	if (block & RADEON_CG_BLOCK_HDP) {
6355 		cik_enable_hdp_mgcg(rdev, enable);
6356 		cik_enable_hdp_ls(rdev, enable);
6357 	}
6358 
6359 	if (block & RADEON_CG_BLOCK_VCE) {
6360 		vce_v2_0_enable_mgcg(rdev, enable);
6361 	}
6362 }
6363 
6364 static void cik_init_cg(struct radeon_device *rdev)
6365 {
6366 
6367 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6368 
6369 	if (rdev->has_uvd)
6370 		si_init_uvd_internal_cg(rdev);
6371 
6372 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6373 			     RADEON_CG_BLOCK_SDMA |
6374 			     RADEON_CG_BLOCK_BIF |
6375 			     RADEON_CG_BLOCK_UVD |
6376 			     RADEON_CG_BLOCK_HDP), true);
6377 }
6378 
6379 static void cik_fini_cg(struct radeon_device *rdev)
6380 {
6381 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6382 			     RADEON_CG_BLOCK_SDMA |
6383 			     RADEON_CG_BLOCK_BIF |
6384 			     RADEON_CG_BLOCK_UVD |
6385 			     RADEON_CG_BLOCK_HDP), false);
6386 
6387 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6388 }
6389 
6390 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6391 					  bool enable)
6392 {
6393 	u32 data, orig;
6394 
6395 	orig = data = RREG32(RLC_PG_CNTL);
6396 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6397 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6398 	else
6399 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6400 	if (orig != data)
6401 		WREG32(RLC_PG_CNTL, data);
6402 }
6403 
6404 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6405 					  bool enable)
6406 {
6407 	u32 data, orig;
6408 
6409 	orig = data = RREG32(RLC_PG_CNTL);
6410 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6411 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6412 	else
6413 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6414 	if (orig != data)
6415 		WREG32(RLC_PG_CNTL, data);
6416 }
6417 
6418 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6419 {
6420 	u32 data, orig;
6421 
6422 	orig = data = RREG32(RLC_PG_CNTL);
6423 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6424 		data &= ~DISABLE_CP_PG;
6425 	else
6426 		data |= DISABLE_CP_PG;
6427 	if (orig != data)
6428 		WREG32(RLC_PG_CNTL, data);
6429 }
6430 
6431 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6432 {
6433 	u32 data, orig;
6434 
6435 	orig = data = RREG32(RLC_PG_CNTL);
6436 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6437 		data &= ~DISABLE_GDS_PG;
6438 	else
6439 		data |= DISABLE_GDS_PG;
6440 	if (orig != data)
6441 		WREG32(RLC_PG_CNTL, data);
6442 }
6443 
6444 #define CP_ME_TABLE_SIZE    96
6445 #define CP_ME_TABLE_OFFSET  2048
6446 #define CP_MEC_TABLE_OFFSET 4096
6447 
6448 void cik_init_cp_pg_table(struct radeon_device *rdev)
6449 {
6450 	volatile u32 *dst_ptr;
6451 	int me, i, max_me = 4;
6452 	u32 bo_offset = 0;
6453 	u32 table_offset, table_size;
6454 
6455 	if (rdev->family == CHIP_KAVERI)
6456 		max_me = 5;
6457 
6458 	if (rdev->rlc.cp_table_ptr == NULL)
6459 		return;
6460 
6461 	/* write the cp table buffer */
6462 	dst_ptr = rdev->rlc.cp_table_ptr;
6463 	for (me = 0; me < max_me; me++) {
6464 		if (rdev->new_fw) {
6465 			const __le32 *fw_data;
6466 			const struct gfx_firmware_header_v1_0 *hdr;
6467 
6468 			if (me == 0) {
6469 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6470 				fw_data = (const __le32 *)
6471 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6472 				table_offset = le32_to_cpu(hdr->jt_offset);
6473 				table_size = le32_to_cpu(hdr->jt_size);
6474 			} else if (me == 1) {
6475 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6476 				fw_data = (const __le32 *)
6477 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6478 				table_offset = le32_to_cpu(hdr->jt_offset);
6479 				table_size = le32_to_cpu(hdr->jt_size);
6480 			} else if (me == 2) {
6481 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6482 				fw_data = (const __le32 *)
6483 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6484 				table_offset = le32_to_cpu(hdr->jt_offset);
6485 				table_size = le32_to_cpu(hdr->jt_size);
6486 			} else if (me == 3) {
6487 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6488 				fw_data = (const __le32 *)
6489 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6490 				table_offset = le32_to_cpu(hdr->jt_offset);
6491 				table_size = le32_to_cpu(hdr->jt_size);
6492 			} else {
6493 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6494 				fw_data = (const __le32 *)
6495 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6496 				table_offset = le32_to_cpu(hdr->jt_offset);
6497 				table_size = le32_to_cpu(hdr->jt_size);
6498 			}
6499 
6500 			for (i = 0; i < table_size; i ++) {
6501 				dst_ptr[bo_offset + i] =
6502 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6503 			}
6504 			bo_offset += table_size;
6505 		} else {
6506 			const __be32 *fw_data;
6507 			table_size = CP_ME_TABLE_SIZE;
6508 
6509 			if (me == 0) {
6510 				fw_data = (const __be32 *)rdev->ce_fw->data;
6511 				table_offset = CP_ME_TABLE_OFFSET;
6512 			} else if (me == 1) {
6513 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6514 				table_offset = CP_ME_TABLE_OFFSET;
6515 			} else if (me == 2) {
6516 				fw_data = (const __be32 *)rdev->me_fw->data;
6517 				table_offset = CP_ME_TABLE_OFFSET;
6518 			} else {
6519 				fw_data = (const __be32 *)rdev->mec_fw->data;
6520 				table_offset = CP_MEC_TABLE_OFFSET;
6521 			}
6522 
6523 			for (i = 0; i < table_size; i ++) {
6524 				dst_ptr[bo_offset + i] =
6525 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6526 			}
6527 			bo_offset += table_size;
6528 		}
6529 	}
6530 }
6531 
6532 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6533 				bool enable)
6534 {
6535 	u32 data, orig;
6536 
6537 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6538 		orig = data = RREG32(RLC_PG_CNTL);
6539 		data |= GFX_PG_ENABLE;
6540 		if (orig != data)
6541 			WREG32(RLC_PG_CNTL, data);
6542 
6543 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6544 		data |= AUTO_PG_EN;
6545 		if (orig != data)
6546 			WREG32(RLC_AUTO_PG_CTRL, data);
6547 	} else {
6548 		orig = data = RREG32(RLC_PG_CNTL);
6549 		data &= ~GFX_PG_ENABLE;
6550 		if (orig != data)
6551 			WREG32(RLC_PG_CNTL, data);
6552 
6553 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6554 		data &= ~AUTO_PG_EN;
6555 		if (orig != data)
6556 			WREG32(RLC_AUTO_PG_CTRL, data);
6557 
6558 		data = RREG32(DB_RENDER_CONTROL);
6559 	}
6560 }
6561 
6562 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6563 {
6564 	u32 mask = 0, tmp, tmp1;
6565 	int i;
6566 
6567 	cik_select_se_sh(rdev, se, sh);
6568 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6569 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6570 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6571 
6572 	tmp &= 0xffff0000;
6573 
6574 	tmp |= tmp1;
6575 	tmp >>= 16;
6576 
6577 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6578 		mask <<= 1;
6579 		mask |= 1;
6580 	}
6581 
6582 	return (~tmp) & mask;
6583 }
6584 
6585 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6586 {
6587 	u32 i, j, k, active_cu_number = 0;
6588 	u32 mask, counter, cu_bitmap;
6589 	u32 tmp = 0;
6590 
6591 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6592 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6593 			mask = 1;
6594 			cu_bitmap = 0;
6595 			counter = 0;
6596 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6597 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6598 					if (counter < 2)
6599 						cu_bitmap |= mask;
6600 					counter ++;
6601 				}
6602 				mask <<= 1;
6603 			}
6604 
6605 			active_cu_number += counter;
6606 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6607 		}
6608 	}
6609 
6610 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6611 
6612 	tmp = RREG32(RLC_MAX_PG_CU);
6613 	tmp &= ~MAX_PU_CU_MASK;
6614 	tmp |= MAX_PU_CU(active_cu_number);
6615 	WREG32(RLC_MAX_PG_CU, tmp);
6616 }
6617 
6618 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6619 				       bool enable)
6620 {
6621 	u32 data, orig;
6622 
6623 	orig = data = RREG32(RLC_PG_CNTL);
6624 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6625 		data |= STATIC_PER_CU_PG_ENABLE;
6626 	else
6627 		data &= ~STATIC_PER_CU_PG_ENABLE;
6628 	if (orig != data)
6629 		WREG32(RLC_PG_CNTL, data);
6630 }
6631 
6632 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6633 					bool enable)
6634 {
6635 	u32 data, orig;
6636 
6637 	orig = data = RREG32(RLC_PG_CNTL);
6638 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6639 		data |= DYN_PER_CU_PG_ENABLE;
6640 	else
6641 		data &= ~DYN_PER_CU_PG_ENABLE;
6642 	if (orig != data)
6643 		WREG32(RLC_PG_CNTL, data);
6644 }
6645 
6646 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6647 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6648 
6649 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6650 {
6651 	u32 data, orig;
6652 	u32 i;
6653 
6654 	if (rdev->rlc.cs_data) {
6655 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6656 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6657 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6658 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6659 	} else {
6660 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6661 		for (i = 0; i < 3; i++)
6662 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6663 	}
6664 	if (rdev->rlc.reg_list) {
6665 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6666 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6667 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6668 	}
6669 
6670 	orig = data = RREG32(RLC_PG_CNTL);
6671 	data |= GFX_PG_SRC;
6672 	if (orig != data)
6673 		WREG32(RLC_PG_CNTL, data);
6674 
6675 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6676 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6677 
6678 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6679 	data &= ~IDLE_POLL_COUNT_MASK;
6680 	data |= IDLE_POLL_COUNT(0x60);
6681 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6682 
6683 	data = 0x10101010;
6684 	WREG32(RLC_PG_DELAY, data);
6685 
6686 	data = RREG32(RLC_PG_DELAY_2);
6687 	data &= ~0xff;
6688 	data |= 0x3;
6689 	WREG32(RLC_PG_DELAY_2, data);
6690 
6691 	data = RREG32(RLC_AUTO_PG_CTRL);
6692 	data &= ~GRBM_REG_SGIT_MASK;
6693 	data |= GRBM_REG_SGIT(0x700);
6694 	WREG32(RLC_AUTO_PG_CTRL, data);
6695 
6696 }
6697 
6698 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6699 {
6700 	cik_enable_gfx_cgpg(rdev, enable);
6701 	cik_enable_gfx_static_mgpg(rdev, enable);
6702 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6703 }
6704 
6705 u32 cik_get_csb_size(struct radeon_device *rdev)
6706 {
6707 	u32 count = 0;
6708 	const struct cs_section_def *sect = NULL;
6709 	const struct cs_extent_def *ext = NULL;
6710 
6711 	if (rdev->rlc.cs_data == NULL)
6712 		return 0;
6713 
6714 	/* begin clear state */
6715 	count += 2;
6716 	/* context control state */
6717 	count += 3;
6718 
6719 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6720 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6721 			if (sect->id == SECT_CONTEXT)
6722 				count += 2 + ext->reg_count;
6723 			else
6724 				return 0;
6725 		}
6726 	}
6727 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6728 	count += 4;
6729 	/* end clear state */
6730 	count += 2;
6731 	/* clear state */
6732 	count += 2;
6733 
6734 	return count;
6735 }
6736 
6737 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6738 {
6739 	u32 count = 0, i;
6740 	const struct cs_section_def *sect = NULL;
6741 	const struct cs_extent_def *ext = NULL;
6742 
6743 	if (rdev->rlc.cs_data == NULL)
6744 		return;
6745 	if (buffer == NULL)
6746 		return;
6747 
6748 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6749 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6750 
6751 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6752 	buffer[count++] = cpu_to_le32(0x80000000);
6753 	buffer[count++] = cpu_to_le32(0x80000000);
6754 
6755 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6756 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6757 			if (sect->id == SECT_CONTEXT) {
6758 				buffer[count++] =
6759 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6760 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6761 				for (i = 0; i < ext->reg_count; i++)
6762 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6763 			} else {
6764 				return;
6765 			}
6766 		}
6767 	}
6768 
6769 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6770 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6771 	switch (rdev->family) {
6772 	case CHIP_BONAIRE:
6773 		buffer[count++] = cpu_to_le32(0x16000012);
6774 		buffer[count++] = cpu_to_le32(0x00000000);
6775 		break;
6776 	case CHIP_KAVERI:
6777 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6778 		buffer[count++] = cpu_to_le32(0x00000000);
6779 		break;
6780 	case CHIP_KABINI:
6781 	case CHIP_MULLINS:
6782 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6783 		buffer[count++] = cpu_to_le32(0x00000000);
6784 		break;
6785 	case CHIP_HAWAII:
6786 		buffer[count++] = cpu_to_le32(0x3a00161a);
6787 		buffer[count++] = cpu_to_le32(0x0000002e);
6788 		break;
6789 	default:
6790 		buffer[count++] = cpu_to_le32(0x00000000);
6791 		buffer[count++] = cpu_to_le32(0x00000000);
6792 		break;
6793 	}
6794 
6795 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6796 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6797 
6798 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6799 	buffer[count++] = cpu_to_le32(0);
6800 }
6801 
6802 static void cik_init_pg(struct radeon_device *rdev)
6803 {
6804 	if (rdev->pg_flags) {
6805 		cik_enable_sck_slowdown_on_pu(rdev, true);
6806 		cik_enable_sck_slowdown_on_pd(rdev, true);
6807 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6808 			cik_init_gfx_cgpg(rdev);
6809 			cik_enable_cp_pg(rdev, true);
6810 			cik_enable_gds_pg(rdev, true);
6811 		}
6812 		cik_init_ao_cu_mask(rdev);
6813 		cik_update_gfx_pg(rdev, true);
6814 	}
6815 }
6816 
6817 static void cik_fini_pg(struct radeon_device *rdev)
6818 {
6819 	if (rdev->pg_flags) {
6820 		cik_update_gfx_pg(rdev, false);
6821 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6822 			cik_enable_cp_pg(rdev, false);
6823 			cik_enable_gds_pg(rdev, false);
6824 		}
6825 	}
6826 }
6827 
6828 /*
6829  * Interrupts
6830  * Starting with r6xx, interrupts are handled via a ring buffer.
6831  * Ring buffers are areas of GPU accessible memory that the GPU
6832  * writes interrupt vectors into and the host reads vectors out of.
6833  * There is a rptr (read pointer) that determines where the
6834  * host is currently reading, and a wptr (write pointer)
6835  * which determines where the GPU has written.  When the
6836  * pointers are equal, the ring is idle.  When the GPU
6837  * writes vectors to the ring buffer, it increments the
6838  * wptr.  When there is an interrupt, the host then starts
6839  * fetching commands and processing them until the pointers are
6840  * equal again at which point it updates the rptr.
6841  */
6842 
6843 /**
6844  * cik_enable_interrupts - Enable the interrupt ring buffer
6845  *
6846  * @rdev: radeon_device pointer
6847  *
6848  * Enable the interrupt ring buffer (CIK).
6849  */
6850 static void cik_enable_interrupts(struct radeon_device *rdev)
6851 {
6852 	u32 ih_cntl = RREG32(IH_CNTL);
6853 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6854 
6855 	ih_cntl |= ENABLE_INTR;
6856 	ih_rb_cntl |= IH_RB_ENABLE;
6857 	WREG32(IH_CNTL, ih_cntl);
6858 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6859 	rdev->ih.enabled = true;
6860 }
6861 
6862 /**
6863  * cik_disable_interrupts - Disable the interrupt ring buffer
6864  *
6865  * @rdev: radeon_device pointer
6866  *
6867  * Disable the interrupt ring buffer (CIK).
6868  */
6869 static void cik_disable_interrupts(struct radeon_device *rdev)
6870 {
6871 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6872 	u32 ih_cntl = RREG32(IH_CNTL);
6873 
6874 	ih_rb_cntl &= ~IH_RB_ENABLE;
6875 	ih_cntl &= ~ENABLE_INTR;
6876 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6877 	WREG32(IH_CNTL, ih_cntl);
6878 	/* set rptr, wptr to 0 */
6879 	WREG32(IH_RB_RPTR, 0);
6880 	WREG32(IH_RB_WPTR, 0);
6881 	rdev->ih.enabled = false;
6882 	rdev->ih.rptr = 0;
6883 }
6884 
6885 /**
6886  * cik_disable_interrupt_state - Disable all interrupt sources
6887  *
6888  * @rdev: radeon_device pointer
6889  *
6890  * Clear all interrupt enable bits used by the driver (CIK).
6891  */
6892 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6893 {
6894 	u32 tmp;
6895 
6896 	/* gfx ring */
6897 	tmp = RREG32(CP_INT_CNTL_RING0) &
6898 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6899 	WREG32(CP_INT_CNTL_RING0, tmp);
6900 	/* sdma */
6901 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6902 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6903 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6904 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6905 	/* compute queues */
6906 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6907 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6908 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6909 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6910 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6911 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6912 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6913 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6914 	/* grbm */
6915 	WREG32(GRBM_INT_CNTL, 0);
6916 	/* SRBM */
6917 	WREG32(SRBM_INT_CNTL, 0);
6918 	/* vline/vblank, etc. */
6919 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6920 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6921 	if (rdev->num_crtc >= 4) {
6922 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6923 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6924 	}
6925 	if (rdev->num_crtc >= 6) {
6926 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6927 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6928 	}
6929 	/* pflip */
6930 	if (rdev->num_crtc >= 2) {
6931 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6932 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6933 	}
6934 	if (rdev->num_crtc >= 4) {
6935 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6936 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6937 	}
6938 	if (rdev->num_crtc >= 6) {
6939 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6940 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6941 	}
6942 
6943 	/* dac hotplug */
6944 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6945 
6946 	/* digital hotplug */
6947 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6948 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6949 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6950 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6951 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6952 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6953 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6954 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6955 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6956 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6957 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6958 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6959 
6960 }
6961 
6962 /**
6963  * cik_irq_init - init and enable the interrupt ring
6964  *
6965  * @rdev: radeon_device pointer
6966  *
6967  * Allocate a ring buffer for the interrupt controller,
6968  * enable the RLC, disable interrupts, enable the IH
6969  * ring buffer and enable it (CIK).
6970  * Called at device load and reume.
6971  * Returns 0 for success, errors for failure.
6972  */
6973 static int cik_irq_init(struct radeon_device *rdev)
6974 {
6975 	int ret = 0;
6976 	int rb_bufsz;
6977 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6978 
6979 	/* allocate ring */
6980 	ret = r600_ih_ring_alloc(rdev);
6981 	if (ret)
6982 		return ret;
6983 
6984 	/* disable irqs */
6985 	cik_disable_interrupts(rdev);
6986 
6987 	/* init rlc */
6988 	ret = cik_rlc_resume(rdev);
6989 	if (ret) {
6990 		r600_ih_ring_fini(rdev);
6991 		return ret;
6992 	}
6993 
6994 	/* setup interrupt control */
6995 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6996 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6997 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6998 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6999 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7000 	 */
7001 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7002 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7003 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7004 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7005 
7006 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7007 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7008 
7009 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7010 		      IH_WPTR_OVERFLOW_CLEAR |
7011 		      (rb_bufsz << 1));
7012 
7013 	if (rdev->wb.enabled)
7014 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7015 
7016 	/* set the writeback address whether it's enabled or not */
7017 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7018 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7019 
7020 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7021 
7022 	/* set rptr, wptr to 0 */
7023 	WREG32(IH_RB_RPTR, 0);
7024 	WREG32(IH_RB_WPTR, 0);
7025 
7026 	/* Default settings for IH_CNTL (disabled at first) */
7027 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7028 	/* RPTR_REARM only works if msi's are enabled */
7029 	if (rdev->msi_enabled)
7030 		ih_cntl |= RPTR_REARM;
7031 	WREG32(IH_CNTL, ih_cntl);
7032 
7033 	/* force the active interrupt state to all disabled */
7034 	cik_disable_interrupt_state(rdev);
7035 
7036 	pci_set_master(rdev->pdev);
7037 
7038 	/* enable irqs */
7039 	cik_enable_interrupts(rdev);
7040 
7041 	return ret;
7042 }
7043 
7044 /**
7045  * cik_irq_set - enable/disable interrupt sources
7046  *
7047  * @rdev: radeon_device pointer
7048  *
7049  * Enable interrupt sources on the GPU (vblanks, hpd,
7050  * etc.) (CIK).
7051  * Returns 0 for success, errors for failure.
7052  */
7053 int cik_irq_set(struct radeon_device *rdev)
7054 {
7055 	u32 cp_int_cntl;
7056 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7057 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7058 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7059 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7060 	u32 grbm_int_cntl = 0;
7061 	u32 dma_cntl, dma_cntl1;
7062 
7063 	if (!rdev->irq.installed) {
7064 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7065 		return -EINVAL;
7066 	}
7067 	/* don't enable anything if the ih is disabled */
7068 	if (!rdev->ih.enabled) {
7069 		cik_disable_interrupts(rdev);
7070 		/* force the active interrupt state to all disabled */
7071 		cik_disable_interrupt_state(rdev);
7072 		return 0;
7073 	}
7074 
7075 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7076 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7077 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7078 
7079 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7080 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7081 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7082 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7083 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7084 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7085 
7086 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7087 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7088 
7089 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7090 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7091 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7092 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7093 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7094 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7095 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7096 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7097 
7098 	/* enable CP interrupts on all rings */
7099 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7100 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7101 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7102 	}
7103 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7104 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7105 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7106 		if (ring->me == 1) {
7107 			switch (ring->pipe) {
7108 			case 0:
7109 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7110 				break;
7111 			case 1:
7112 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7113 				break;
7114 			case 2:
7115 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7116 				break;
7117 			case 3:
7118 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7119 				break;
7120 			default:
7121 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7122 				break;
7123 			}
7124 		} else if (ring->me == 2) {
7125 			switch (ring->pipe) {
7126 			case 0:
7127 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7128 				break;
7129 			case 1:
7130 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7131 				break;
7132 			case 2:
7133 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7134 				break;
7135 			case 3:
7136 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7137 				break;
7138 			default:
7139 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7140 				break;
7141 			}
7142 		} else {
7143 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7144 		}
7145 	}
7146 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7147 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7148 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7149 		if (ring->me == 1) {
7150 			switch (ring->pipe) {
7151 			case 0:
7152 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7153 				break;
7154 			case 1:
7155 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7156 				break;
7157 			case 2:
7158 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7159 				break;
7160 			case 3:
7161 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7162 				break;
7163 			default:
7164 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7165 				break;
7166 			}
7167 		} else if (ring->me == 2) {
7168 			switch (ring->pipe) {
7169 			case 0:
7170 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7171 				break;
7172 			case 1:
7173 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7174 				break;
7175 			case 2:
7176 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7177 				break;
7178 			case 3:
7179 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7180 				break;
7181 			default:
7182 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7183 				break;
7184 			}
7185 		} else {
7186 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7187 		}
7188 	}
7189 
7190 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7191 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7192 		dma_cntl |= TRAP_ENABLE;
7193 	}
7194 
7195 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7196 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7197 		dma_cntl1 |= TRAP_ENABLE;
7198 	}
7199 
7200 	if (rdev->irq.crtc_vblank_int[0] ||
7201 	    atomic_read(&rdev->irq.pflip[0])) {
7202 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7203 		crtc1 |= VBLANK_INTERRUPT_MASK;
7204 	}
7205 	if (rdev->irq.crtc_vblank_int[1] ||
7206 	    atomic_read(&rdev->irq.pflip[1])) {
7207 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7208 		crtc2 |= VBLANK_INTERRUPT_MASK;
7209 	}
7210 	if (rdev->irq.crtc_vblank_int[2] ||
7211 	    atomic_read(&rdev->irq.pflip[2])) {
7212 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7213 		crtc3 |= VBLANK_INTERRUPT_MASK;
7214 	}
7215 	if (rdev->irq.crtc_vblank_int[3] ||
7216 	    atomic_read(&rdev->irq.pflip[3])) {
7217 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7218 		crtc4 |= VBLANK_INTERRUPT_MASK;
7219 	}
7220 	if (rdev->irq.crtc_vblank_int[4] ||
7221 	    atomic_read(&rdev->irq.pflip[4])) {
7222 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7223 		crtc5 |= VBLANK_INTERRUPT_MASK;
7224 	}
7225 	if (rdev->irq.crtc_vblank_int[5] ||
7226 	    atomic_read(&rdev->irq.pflip[5])) {
7227 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7228 		crtc6 |= VBLANK_INTERRUPT_MASK;
7229 	}
7230 	if (rdev->irq.hpd[0]) {
7231 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7232 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7233 	}
7234 	if (rdev->irq.hpd[1]) {
7235 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7236 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7237 	}
7238 	if (rdev->irq.hpd[2]) {
7239 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7240 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7241 	}
7242 	if (rdev->irq.hpd[3]) {
7243 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7244 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7245 	}
7246 	if (rdev->irq.hpd[4]) {
7247 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7248 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7249 	}
7250 	if (rdev->irq.hpd[5]) {
7251 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7252 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7253 	}
7254 
7255 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7256 
7257 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7258 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7259 
7260 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7261 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7262 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7263 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7264 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7265 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7266 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7267 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7268 
7269 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7270 
7271 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7272 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7273 	if (rdev->num_crtc >= 4) {
7274 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7275 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7276 	}
7277 	if (rdev->num_crtc >= 6) {
7278 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7279 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7280 	}
7281 
7282 	if (rdev->num_crtc >= 2) {
7283 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7284 		       GRPH_PFLIP_INT_MASK);
7285 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7286 		       GRPH_PFLIP_INT_MASK);
7287 	}
7288 	if (rdev->num_crtc >= 4) {
7289 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7290 		       GRPH_PFLIP_INT_MASK);
7291 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7292 		       GRPH_PFLIP_INT_MASK);
7293 	}
7294 	if (rdev->num_crtc >= 6) {
7295 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7296 		       GRPH_PFLIP_INT_MASK);
7297 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7298 		       GRPH_PFLIP_INT_MASK);
7299 	}
7300 
7301 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7302 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7303 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7304 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7305 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7306 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7307 
7308 	/* posting read */
7309 	RREG32(SRBM_STATUS);
7310 
7311 	return 0;
7312 }
7313 
7314 /**
7315  * cik_irq_ack - ack interrupt sources
7316  *
7317  * @rdev: radeon_device pointer
7318  *
7319  * Ack interrupt sources on the GPU (vblanks, hpd,
7320  * etc.) (CIK).  Certain interrupts sources are sw
7321  * generated and do not require an explicit ack.
7322  */
7323 static inline void cik_irq_ack(struct radeon_device *rdev)
7324 {
7325 	u32 tmp;
7326 
7327 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7328 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7329 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7330 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7331 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7332 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7333 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7334 
7335 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7336 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7337 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7338 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7339 	if (rdev->num_crtc >= 4) {
7340 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7341 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7342 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7343 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7344 	}
7345 	if (rdev->num_crtc >= 6) {
7346 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7347 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7348 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7349 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7350 	}
7351 
7352 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7353 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7354 		       GRPH_PFLIP_INT_CLEAR);
7355 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7356 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7357 		       GRPH_PFLIP_INT_CLEAR);
7358 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7359 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7360 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7361 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7362 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7363 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7364 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7365 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7366 
7367 	if (rdev->num_crtc >= 4) {
7368 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7369 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7370 			       GRPH_PFLIP_INT_CLEAR);
7371 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7372 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7373 			       GRPH_PFLIP_INT_CLEAR);
7374 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7375 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7376 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7377 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7378 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7379 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7380 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7381 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7382 	}
7383 
7384 	if (rdev->num_crtc >= 6) {
7385 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7386 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7387 			       GRPH_PFLIP_INT_CLEAR);
7388 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7389 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7390 			       GRPH_PFLIP_INT_CLEAR);
7391 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7392 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7393 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7394 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7395 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7396 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7397 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7398 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7399 	}
7400 
7401 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7402 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7403 		tmp |= DC_HPDx_INT_ACK;
7404 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7405 	}
7406 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7407 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7408 		tmp |= DC_HPDx_INT_ACK;
7409 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7410 	}
7411 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7412 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7413 		tmp |= DC_HPDx_INT_ACK;
7414 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7415 	}
7416 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7417 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7418 		tmp |= DC_HPDx_INT_ACK;
7419 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7420 	}
7421 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7422 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7423 		tmp |= DC_HPDx_INT_ACK;
7424 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7425 	}
7426 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7427 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7428 		tmp |= DC_HPDx_INT_ACK;
7429 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7430 	}
7431 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7432 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7433 		tmp |= DC_HPDx_RX_INT_ACK;
7434 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7435 	}
7436 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7437 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7438 		tmp |= DC_HPDx_RX_INT_ACK;
7439 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7440 	}
7441 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7442 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7443 		tmp |= DC_HPDx_RX_INT_ACK;
7444 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7445 	}
7446 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7447 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7448 		tmp |= DC_HPDx_RX_INT_ACK;
7449 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7450 	}
7451 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7452 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7453 		tmp |= DC_HPDx_RX_INT_ACK;
7454 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7455 	}
7456 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7457 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7458 		tmp |= DC_HPDx_RX_INT_ACK;
7459 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7460 	}
7461 }
7462 
7463 /**
7464  * cik_irq_disable - disable interrupts
7465  *
7466  * @rdev: radeon_device pointer
7467  *
7468  * Disable interrupts on the hw (CIK).
7469  */
7470 static void cik_irq_disable(struct radeon_device *rdev)
7471 {
7472 	cik_disable_interrupts(rdev);
7473 	/* Wait and acknowledge irq */
7474 	mdelay(1);
7475 	cik_irq_ack(rdev);
7476 	cik_disable_interrupt_state(rdev);
7477 }
7478 
7479 /**
7480  * cik_irq_disable - disable interrupts for suspend
7481  *
7482  * @rdev: radeon_device pointer
7483  *
7484  * Disable interrupts and stop the RLC (CIK).
7485  * Used for suspend.
7486  */
7487 static void cik_irq_suspend(struct radeon_device *rdev)
7488 {
7489 	cik_irq_disable(rdev);
7490 	cik_rlc_stop(rdev);
7491 }
7492 
7493 /**
7494  * cik_irq_fini - tear down interrupt support
7495  *
7496  * @rdev: radeon_device pointer
7497  *
7498  * Disable interrupts on the hw and free the IH ring
7499  * buffer (CIK).
7500  * Used for driver unload.
7501  */
7502 static void cik_irq_fini(struct radeon_device *rdev)
7503 {
7504 	cik_irq_suspend(rdev);
7505 	r600_ih_ring_fini(rdev);
7506 }
7507 
7508 /**
7509  * cik_get_ih_wptr - get the IH ring buffer wptr
7510  *
7511  * @rdev: radeon_device pointer
7512  *
7513  * Get the IH ring buffer wptr from either the register
7514  * or the writeback memory buffer (CIK).  Also check for
7515  * ring buffer overflow and deal with it.
7516  * Used by cik_irq_process().
7517  * Returns the value of the wptr.
7518  */
7519 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7520 {
7521 	u32 wptr, tmp;
7522 
7523 	if (rdev->wb.enabled)
7524 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7525 	else
7526 		wptr = RREG32(IH_RB_WPTR);
7527 
7528 	if (wptr & RB_OVERFLOW) {
7529 		wptr &= ~RB_OVERFLOW;
7530 		/* When a ring buffer overflow happen start parsing interrupt
7531 		 * from the last not overwritten vector (wptr + 16). Hopefully
7532 		 * this should allow us to catchup.
7533 		 */
7534 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7535 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7536 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7537 		tmp = RREG32(IH_RB_CNTL);
7538 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7539 		WREG32(IH_RB_CNTL, tmp);
7540 	}
7541 	return (wptr & rdev->ih.ptr_mask);
7542 }
7543 
7544 /*        CIK IV Ring
7545  * Each IV ring entry is 128 bits:
7546  * [7:0]    - interrupt source id
7547  * [31:8]   - reserved
7548  * [59:32]  - interrupt source data
7549  * [63:60]  - reserved
7550  * [71:64]  - RINGID
7551  *            CP:
7552  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7553  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7554  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7555  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7556  *            PIPE_ID - ME0 0=3D
7557  *                    - ME1&2 compute dispatcher (4 pipes each)
7558  *            SDMA:
7559  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7560  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7561  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7562  * [79:72]  - VMID
7563  * [95:80]  - PASID
7564  * [127:96] - reserved
7565  */
7566 /**
7567  * cik_irq_process - interrupt handler
7568  *
7569  * @rdev: radeon_device pointer
7570  *
7571  * Interrupt hander (CIK).  Walk the IH ring,
7572  * ack interrupts and schedule work to handle
7573  * interrupt events.
7574  * Returns irq process return code.
7575  */
7576 int cik_irq_process(struct radeon_device *rdev)
7577 {
7578 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7579 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7580 	u32 wptr;
7581 	u32 rptr;
7582 	u32 src_id, src_data, ring_id;
7583 	u8 me_id, pipe_id, queue_id;
7584 	u32 ring_index;
7585 	bool queue_hotplug = false;
7586 	bool queue_dp = false;
7587 	bool queue_reset = false;
7588 	u32 addr, status, mc_client;
7589 	bool queue_thermal = false;
7590 
7591 	if (!rdev->ih.enabled || rdev->shutdown)
7592 		return IRQ_NONE;
7593 
7594 	wptr = cik_get_ih_wptr(rdev);
7595 
7596 restart_ih:
7597 	/* is somebody else already processing irqs? */
7598 	if (atomic_xchg(&rdev->ih.lock, 1))
7599 		return IRQ_NONE;
7600 
7601 	rptr = rdev->ih.rptr;
7602 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7603 
7604 	/* Order reading of wptr vs. reading of IH ring data */
7605 	rmb();
7606 
7607 	/* display interrupts */
7608 	cik_irq_ack(rdev);
7609 
7610 	while (rptr != wptr) {
7611 		/* wptr/rptr are in bytes! */
7612 		ring_index = rptr / 4;
7613 
7614 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7615 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7616 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7617 
7618 		switch (src_id) {
7619 		case 1: /* D1 vblank/vline */
7620 			switch (src_data) {
7621 			case 0: /* D1 vblank */
7622 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7623 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7624 
7625 				if (rdev->irq.crtc_vblank_int[0]) {
7626 					drm_handle_vblank(rdev->ddev, 0);
7627 					rdev->pm.vblank_sync = true;
7628 					wake_up(&rdev->irq.vblank_queue);
7629 				}
7630 				if (atomic_read(&rdev->irq.pflip[0]))
7631 					radeon_crtc_handle_vblank(rdev, 0);
7632 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7633 				DRM_DEBUG("IH: D1 vblank\n");
7634 
7635 				break;
7636 			case 1: /* D1 vline */
7637 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7638 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7639 
7640 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7641 				DRM_DEBUG("IH: D1 vline\n");
7642 
7643 				break;
7644 			default:
7645 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7646 				break;
7647 			}
7648 			break;
7649 		case 2: /* D2 vblank/vline */
7650 			switch (src_data) {
7651 			case 0: /* D2 vblank */
7652 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7653 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7654 
7655 				if (rdev->irq.crtc_vblank_int[1]) {
7656 					drm_handle_vblank(rdev->ddev, 1);
7657 					rdev->pm.vblank_sync = true;
7658 					wake_up(&rdev->irq.vblank_queue);
7659 				}
7660 				if (atomic_read(&rdev->irq.pflip[1]))
7661 					radeon_crtc_handle_vblank(rdev, 1);
7662 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7663 				DRM_DEBUG("IH: D2 vblank\n");
7664 
7665 				break;
7666 			case 1: /* D2 vline */
7667 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7668 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7669 
7670 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7671 				DRM_DEBUG("IH: D2 vline\n");
7672 
7673 				break;
7674 			default:
7675 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7676 				break;
7677 			}
7678 			break;
7679 		case 3: /* D3 vblank/vline */
7680 			switch (src_data) {
7681 			case 0: /* D3 vblank */
7682 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7683 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7684 
7685 				if (rdev->irq.crtc_vblank_int[2]) {
7686 					drm_handle_vblank(rdev->ddev, 2);
7687 					rdev->pm.vblank_sync = true;
7688 					wake_up(&rdev->irq.vblank_queue);
7689 				}
7690 				if (atomic_read(&rdev->irq.pflip[2]))
7691 					radeon_crtc_handle_vblank(rdev, 2);
7692 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7693 				DRM_DEBUG("IH: D3 vblank\n");
7694 
7695 				break;
7696 			case 1: /* D3 vline */
7697 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7698 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7699 
7700 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7701 				DRM_DEBUG("IH: D3 vline\n");
7702 
7703 				break;
7704 			default:
7705 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7706 				break;
7707 			}
7708 			break;
7709 		case 4: /* D4 vblank/vline */
7710 			switch (src_data) {
7711 			case 0: /* D4 vblank */
7712 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7713 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7714 
7715 				if (rdev->irq.crtc_vblank_int[3]) {
7716 					drm_handle_vblank(rdev->ddev, 3);
7717 					rdev->pm.vblank_sync = true;
7718 					wake_up(&rdev->irq.vblank_queue);
7719 				}
7720 				if (atomic_read(&rdev->irq.pflip[3]))
7721 					radeon_crtc_handle_vblank(rdev, 3);
7722 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7723 				DRM_DEBUG("IH: D4 vblank\n");
7724 
7725 				break;
7726 			case 1: /* D4 vline */
7727 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7728 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7729 
7730 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7731 				DRM_DEBUG("IH: D4 vline\n");
7732 
7733 				break;
7734 			default:
7735 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7736 				break;
7737 			}
7738 			break;
7739 		case 5: /* D5 vblank/vline */
7740 			switch (src_data) {
7741 			case 0: /* D5 vblank */
7742 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7743 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7744 
7745 				if (rdev->irq.crtc_vblank_int[4]) {
7746 					drm_handle_vblank(rdev->ddev, 4);
7747 					rdev->pm.vblank_sync = true;
7748 					wake_up(&rdev->irq.vblank_queue);
7749 				}
7750 				if (atomic_read(&rdev->irq.pflip[4]))
7751 					radeon_crtc_handle_vblank(rdev, 4);
7752 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7753 				DRM_DEBUG("IH: D5 vblank\n");
7754 
7755 				break;
7756 			case 1: /* D5 vline */
7757 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7758 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7759 
7760 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7761 				DRM_DEBUG("IH: D5 vline\n");
7762 
7763 				break;
7764 			default:
7765 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7766 				break;
7767 			}
7768 			break;
7769 		case 6: /* D6 vblank/vline */
7770 			switch (src_data) {
7771 			case 0: /* D6 vblank */
7772 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7773 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7774 
7775 				if (rdev->irq.crtc_vblank_int[5]) {
7776 					drm_handle_vblank(rdev->ddev, 5);
7777 					rdev->pm.vblank_sync = true;
7778 					wake_up(&rdev->irq.vblank_queue);
7779 				}
7780 				if (atomic_read(&rdev->irq.pflip[5]))
7781 					radeon_crtc_handle_vblank(rdev, 5);
7782 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7783 				DRM_DEBUG("IH: D6 vblank\n");
7784 
7785 				break;
7786 			case 1: /* D6 vline */
7787 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7788 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7789 
7790 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7791 				DRM_DEBUG("IH: D6 vline\n");
7792 
7793 				break;
7794 			default:
7795 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7796 				break;
7797 			}
7798 			break;
7799 		case 8: /* D1 page flip */
7800 		case 10: /* D2 page flip */
7801 		case 12: /* D3 page flip */
7802 		case 14: /* D4 page flip */
7803 		case 16: /* D5 page flip */
7804 		case 18: /* D6 page flip */
7805 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7806 			if (radeon_use_pflipirq > 0)
7807 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7808 			break;
7809 		case 42: /* HPD hotplug */
7810 			switch (src_data) {
7811 			case 0:
7812 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7813 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7814 
7815 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7816 				queue_hotplug = true;
7817 				DRM_DEBUG("IH: HPD1\n");
7818 
7819 				break;
7820 			case 1:
7821 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7822 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7823 
7824 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7825 				queue_hotplug = true;
7826 				DRM_DEBUG("IH: HPD2\n");
7827 
7828 				break;
7829 			case 2:
7830 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7831 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7832 
7833 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7834 				queue_hotplug = true;
7835 				DRM_DEBUG("IH: HPD3\n");
7836 
7837 				break;
7838 			case 3:
7839 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7840 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7841 
7842 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7843 				queue_hotplug = true;
7844 				DRM_DEBUG("IH: HPD4\n");
7845 
7846 				break;
7847 			case 4:
7848 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7849 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7850 
7851 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7852 				queue_hotplug = true;
7853 				DRM_DEBUG("IH: HPD5\n");
7854 
7855 				break;
7856 			case 5:
7857 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7858 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859 
7860 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7861 				queue_hotplug = true;
7862 				DRM_DEBUG("IH: HPD6\n");
7863 
7864 				break;
7865 			case 6:
7866 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7867 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7868 
7869 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7870 				queue_dp = true;
7871 				DRM_DEBUG("IH: HPD_RX 1\n");
7872 
7873 				break;
7874 			case 7:
7875 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7876 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7877 
7878 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7879 				queue_dp = true;
7880 				DRM_DEBUG("IH: HPD_RX 2\n");
7881 
7882 				break;
7883 			case 8:
7884 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7885 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7886 
7887 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7888 				queue_dp = true;
7889 				DRM_DEBUG("IH: HPD_RX 3\n");
7890 
7891 				break;
7892 			case 9:
7893 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7894 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7895 
7896 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7897 				queue_dp = true;
7898 				DRM_DEBUG("IH: HPD_RX 4\n");
7899 
7900 				break;
7901 			case 10:
7902 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7903 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7904 
7905 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7906 				queue_dp = true;
7907 				DRM_DEBUG("IH: HPD_RX 5\n");
7908 
7909 				break;
7910 			case 11:
7911 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7912 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7913 
7914 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7915 				queue_dp = true;
7916 				DRM_DEBUG("IH: HPD_RX 6\n");
7917 
7918 				break;
7919 			default:
7920 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7921 				break;
7922 			}
7923 			break;
7924 		case 96:
7925 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7926 			WREG32(SRBM_INT_ACK, 0x1);
7927 			break;
7928 		case 124: /* UVD */
7929 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7930 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7931 			break;
7932 		case 146:
7933 		case 147:
7934 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7935 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7936 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7937 			/* reset addr and status */
7938 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7939 			if (addr == 0x0 && status == 0x0)
7940 				break;
7941 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7942 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7943 				addr);
7944 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7945 				status);
7946 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7947 			break;
7948 		case 167: /* VCE */
7949 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7950 			switch (src_data) {
7951 			case 0:
7952 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7953 				break;
7954 			case 1:
7955 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7956 				break;
7957 			default:
7958 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7959 				break;
7960 			}
7961 			break;
7962 		case 176: /* GFX RB CP_INT */
7963 		case 177: /* GFX IB CP_INT */
7964 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7965 			break;
7966 		case 181: /* CP EOP event */
7967 			DRM_DEBUG("IH: CP EOP\n");
7968 			/* XXX check the bitfield order! */
7969 			me_id = (ring_id & 0x60) >> 5;
7970 			pipe_id = (ring_id & 0x18) >> 3;
7971 			queue_id = (ring_id & 0x7) >> 0;
7972 			switch (me_id) {
7973 			case 0:
7974 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7975 				break;
7976 			case 1:
7977 			case 2:
7978 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7979 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7980 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7981 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7982 				break;
7983 			}
7984 			break;
7985 		case 184: /* CP Privileged reg access */
7986 			DRM_ERROR("Illegal register access in command stream\n");
7987 			/* XXX check the bitfield order! */
7988 			me_id = (ring_id & 0x60) >> 5;
7989 			pipe_id = (ring_id & 0x18) >> 3;
7990 			queue_id = (ring_id & 0x7) >> 0;
7991 			switch (me_id) {
7992 			case 0:
7993 				/* This results in a full GPU reset, but all we need to do is soft
7994 				 * reset the CP for gfx
7995 				 */
7996 				queue_reset = true;
7997 				break;
7998 			case 1:
7999 				/* XXX compute */
8000 				queue_reset = true;
8001 				break;
8002 			case 2:
8003 				/* XXX compute */
8004 				queue_reset = true;
8005 				break;
8006 			}
8007 			break;
8008 		case 185: /* CP Privileged inst */
8009 			DRM_ERROR("Illegal instruction in command stream\n");
8010 			/* XXX check the bitfield order! */
8011 			me_id = (ring_id & 0x60) >> 5;
8012 			pipe_id = (ring_id & 0x18) >> 3;
8013 			queue_id = (ring_id & 0x7) >> 0;
8014 			switch (me_id) {
8015 			case 0:
8016 				/* This results in a full GPU reset, but all we need to do is soft
8017 				 * reset the CP for gfx
8018 				 */
8019 				queue_reset = true;
8020 				break;
8021 			case 1:
8022 				/* XXX compute */
8023 				queue_reset = true;
8024 				break;
8025 			case 2:
8026 				/* XXX compute */
8027 				queue_reset = true;
8028 				break;
8029 			}
8030 			break;
8031 		case 224: /* SDMA trap event */
8032 			/* XXX check the bitfield order! */
8033 			me_id = (ring_id & 0x3) >> 0;
8034 			queue_id = (ring_id & 0xc) >> 2;
8035 			DRM_DEBUG("IH: SDMA trap\n");
8036 			switch (me_id) {
8037 			case 0:
8038 				switch (queue_id) {
8039 				case 0:
8040 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8041 					break;
8042 				case 1:
8043 					/* XXX compute */
8044 					break;
8045 				case 2:
8046 					/* XXX compute */
8047 					break;
8048 				}
8049 				break;
8050 			case 1:
8051 				switch (queue_id) {
8052 				case 0:
8053 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8054 					break;
8055 				case 1:
8056 					/* XXX compute */
8057 					break;
8058 				case 2:
8059 					/* XXX compute */
8060 					break;
8061 				}
8062 				break;
8063 			}
8064 			break;
8065 		case 230: /* thermal low to high */
8066 			DRM_DEBUG("IH: thermal low to high\n");
8067 			rdev->pm.dpm.thermal.high_to_low = false;
8068 			queue_thermal = true;
8069 			break;
8070 		case 231: /* thermal high to low */
8071 			DRM_DEBUG("IH: thermal high to low\n");
8072 			rdev->pm.dpm.thermal.high_to_low = true;
8073 			queue_thermal = true;
8074 			break;
8075 		case 233: /* GUI IDLE */
8076 			DRM_DEBUG("IH: GUI idle\n");
8077 			break;
8078 		case 241: /* SDMA Privileged inst */
8079 		case 247: /* SDMA Privileged inst */
8080 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8081 			/* XXX check the bitfield order! */
8082 			me_id = (ring_id & 0x3) >> 0;
8083 			queue_id = (ring_id & 0xc) >> 2;
8084 			switch (me_id) {
8085 			case 0:
8086 				switch (queue_id) {
8087 				case 0:
8088 					queue_reset = true;
8089 					break;
8090 				case 1:
8091 					/* XXX compute */
8092 					queue_reset = true;
8093 					break;
8094 				case 2:
8095 					/* XXX compute */
8096 					queue_reset = true;
8097 					break;
8098 				}
8099 				break;
8100 			case 1:
8101 				switch (queue_id) {
8102 				case 0:
8103 					queue_reset = true;
8104 					break;
8105 				case 1:
8106 					/* XXX compute */
8107 					queue_reset = true;
8108 					break;
8109 				case 2:
8110 					/* XXX compute */
8111 					queue_reset = true;
8112 					break;
8113 				}
8114 				break;
8115 			}
8116 			break;
8117 		default:
8118 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8119 			break;
8120 		}
8121 
8122 		/* wptr/rptr are in bytes! */
8123 		rptr += 16;
8124 		rptr &= rdev->ih.ptr_mask;
8125 		WREG32(IH_RB_RPTR, rptr);
8126 	}
8127 	if (queue_dp)
8128 		schedule_work(&rdev->dp_work);
8129 	if (queue_hotplug)
8130 		schedule_delayed_work(&rdev->hotplug_work, 0);
8131 	if (queue_reset) {
8132 		rdev->needs_reset = true;
8133 		wake_up_all(&rdev->fence_queue);
8134 	}
8135 	if (queue_thermal)
8136 		schedule_work(&rdev->pm.dpm.thermal.work);
8137 	rdev->ih.rptr = rptr;
8138 	atomic_set(&rdev->ih.lock, 0);
8139 
8140 	/* make sure wptr hasn't changed while processing */
8141 	wptr = cik_get_ih_wptr(rdev);
8142 	if (wptr != rptr)
8143 		goto restart_ih;
8144 
8145 	return IRQ_HANDLED;
8146 }
8147 
8148 /*
8149  * startup/shutdown callbacks
8150  */
8151 static void cik_uvd_init(struct radeon_device *rdev)
8152 {
8153 	int r;
8154 
8155 	if (!rdev->has_uvd)
8156 		return;
8157 
8158 	r = radeon_uvd_init(rdev);
8159 	if (r) {
8160 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8161 		/*
8162 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8163 		 * to early fails cik_uvd_start() and thus nothing happens
8164 		 * there. So it is pointless to try to go through that code
8165 		 * hence why we disable uvd here.
8166 		 */
8167 		rdev->has_uvd = 0;
8168 		return;
8169 	}
8170 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8171 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8172 }
8173 
8174 static void cik_uvd_start(struct radeon_device *rdev)
8175 {
8176 	int r;
8177 
8178 	if (!rdev->has_uvd)
8179 		return;
8180 
8181 	r = radeon_uvd_resume(rdev);
8182 	if (r) {
8183 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8184 		goto error;
8185 	}
8186 	r = uvd_v4_2_resume(rdev);
8187 	if (r) {
8188 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8189 		goto error;
8190 	}
8191 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8192 	if (r) {
8193 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8194 		goto error;
8195 	}
8196 	return;
8197 
8198 error:
8199 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8200 }
8201 
8202 static void cik_uvd_resume(struct radeon_device *rdev)
8203 {
8204 	struct radeon_ring *ring;
8205 	int r;
8206 
8207 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8208 		return;
8209 
8210 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8211 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8212 	if (r) {
8213 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8214 		return;
8215 	}
8216 	r = uvd_v1_0_init(rdev);
8217 	if (r) {
8218 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8219 		return;
8220 	}
8221 }
8222 
8223 static void cik_vce_init(struct radeon_device *rdev)
8224 {
8225 	int r;
8226 
8227 	if (!rdev->has_vce)
8228 		return;
8229 
8230 	r = radeon_vce_init(rdev);
8231 	if (r) {
8232 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8233 		/*
8234 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8235 		 * to early fails cik_vce_start() and thus nothing happens
8236 		 * there. So it is pointless to try to go through that code
8237 		 * hence why we disable vce here.
8238 		 */
8239 		rdev->has_vce = 0;
8240 		return;
8241 	}
8242 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8243 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8244 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8245 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8246 }
8247 
8248 static void cik_vce_start(struct radeon_device *rdev)
8249 {
8250 	int r;
8251 
8252 	if (!rdev->has_vce)
8253 		return;
8254 
8255 	r = radeon_vce_resume(rdev);
8256 	if (r) {
8257 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258 		goto error;
8259 	}
8260 	r = vce_v2_0_resume(rdev);
8261 	if (r) {
8262 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8263 		goto error;
8264 	}
8265 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8266 	if (r) {
8267 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8268 		goto error;
8269 	}
8270 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8271 	if (r) {
8272 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8273 		goto error;
8274 	}
8275 	return;
8276 
8277 error:
8278 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8279 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8280 }
8281 
8282 static void cik_vce_resume(struct radeon_device *rdev)
8283 {
8284 	struct radeon_ring *ring;
8285 	int r;
8286 
8287 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8288 		return;
8289 
8290 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8291 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8292 	if (r) {
8293 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8294 		return;
8295 	}
8296 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8297 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8298 	if (r) {
8299 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8300 		return;
8301 	}
8302 	r = vce_v1_0_init(rdev);
8303 	if (r) {
8304 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8305 		return;
8306 	}
8307 }
8308 
8309 /**
8310  * cik_startup - program the asic to a functional state
8311  *
8312  * @rdev: radeon_device pointer
8313  *
8314  * Programs the asic to a functional state (CIK).
8315  * Called by cik_init() and cik_resume().
8316  * Returns 0 for success, error for failure.
8317  */
8318 static int cik_startup(struct radeon_device *rdev)
8319 {
8320 	struct radeon_ring *ring;
8321 	u32 nop;
8322 	int r;
8323 
8324 	/* enable pcie gen2/3 link */
8325 	cik_pcie_gen3_enable(rdev);
8326 	/* enable aspm */
8327 	cik_program_aspm(rdev);
8328 
8329 	/* scratch needs to be initialized before MC */
8330 	r = r600_vram_scratch_init(rdev);
8331 	if (r)
8332 		return r;
8333 
8334 	cik_mc_program(rdev);
8335 
8336 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8337 		r = ci_mc_load_microcode(rdev);
8338 		if (r) {
8339 			DRM_ERROR("Failed to load MC firmware!\n");
8340 			return r;
8341 		}
8342 	}
8343 
8344 	r = cik_pcie_gart_enable(rdev);
8345 	if (r)
8346 		return r;
8347 	cik_gpu_init(rdev);
8348 
8349 	/* allocate rlc buffers */
8350 	if (rdev->flags & RADEON_IS_IGP) {
8351 		if (rdev->family == CHIP_KAVERI) {
8352 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8353 			rdev->rlc.reg_list_size =
8354 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8355 		} else {
8356 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8357 			rdev->rlc.reg_list_size =
8358 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8359 		}
8360 	}
8361 	rdev->rlc.cs_data = ci_cs_data;
8362 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8363 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8364 	r = sumo_rlc_init(rdev);
8365 	if (r) {
8366 		DRM_ERROR("Failed to init rlc BOs!\n");
8367 		return r;
8368 	}
8369 
8370 	/* allocate wb buffer */
8371 	r = radeon_wb_init(rdev);
8372 	if (r)
8373 		return r;
8374 
8375 	/* allocate mec buffers */
8376 	r = cik_mec_init(rdev);
8377 	if (r) {
8378 		DRM_ERROR("Failed to init MEC BOs!\n");
8379 		return r;
8380 	}
8381 
8382 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8383 	if (r) {
8384 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8385 		return r;
8386 	}
8387 
8388 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8389 	if (r) {
8390 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8391 		return r;
8392 	}
8393 
8394 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8395 	if (r) {
8396 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8397 		return r;
8398 	}
8399 
8400 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8401 	if (r) {
8402 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8403 		return r;
8404 	}
8405 
8406 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8407 	if (r) {
8408 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8409 		return r;
8410 	}
8411 
8412 	cik_uvd_start(rdev);
8413 	cik_vce_start(rdev);
8414 
8415 	/* Enable IRQ */
8416 	if (!rdev->irq.installed) {
8417 		r = radeon_irq_kms_init(rdev);
8418 		if (r)
8419 			return r;
8420 	}
8421 
8422 	r = cik_irq_init(rdev);
8423 	if (r) {
8424 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8425 		radeon_irq_kms_fini(rdev);
8426 		return r;
8427 	}
8428 	cik_irq_set(rdev);
8429 
8430 	if (rdev->family == CHIP_HAWAII) {
8431 		if (rdev->new_fw)
8432 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8433 		else
8434 			nop = RADEON_CP_PACKET2;
8435 	} else {
8436 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8437 	}
8438 
8439 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8440 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8441 			     nop);
8442 	if (r)
8443 		return r;
8444 
8445 	/* set up the compute queues */
8446 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8447 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8448 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8449 			     nop);
8450 	if (r)
8451 		return r;
8452 	ring->me = 1; /* first MEC */
8453 	ring->pipe = 0; /* first pipe */
8454 	ring->queue = 0; /* first queue */
8455 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8456 
8457 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8458 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8459 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8460 			     nop);
8461 	if (r)
8462 		return r;
8463 	/* dGPU only have 1 MEC */
8464 	ring->me = 1; /* first MEC */
8465 	ring->pipe = 0; /* first pipe */
8466 	ring->queue = 1; /* second queue */
8467 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8468 
8469 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8470 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8471 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8472 	if (r)
8473 		return r;
8474 
8475 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8476 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8477 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8478 	if (r)
8479 		return r;
8480 
8481 	r = cik_cp_resume(rdev);
8482 	if (r)
8483 		return r;
8484 
8485 	r = cik_sdma_resume(rdev);
8486 	if (r)
8487 		return r;
8488 
8489 	cik_uvd_resume(rdev);
8490 	cik_vce_resume(rdev);
8491 
8492 	r = radeon_ib_pool_init(rdev);
8493 	if (r) {
8494 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8495 		return r;
8496 	}
8497 
8498 	r = radeon_vm_manager_init(rdev);
8499 	if (r) {
8500 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8501 		return r;
8502 	}
8503 
8504 	r = radeon_audio_init(rdev);
8505 	if (r)
8506 		return r;
8507 
8508 	return 0;
8509 }
8510 
8511 /**
8512  * cik_resume - resume the asic to a functional state
8513  *
8514  * @rdev: radeon_device pointer
8515  *
8516  * Programs the asic to a functional state (CIK).
8517  * Called at resume.
8518  * Returns 0 for success, error for failure.
8519  */
8520 int cik_resume(struct radeon_device *rdev)
8521 {
8522 	int r;
8523 
8524 	/* post card */
8525 	atom_asic_init(rdev->mode_info.atom_context);
8526 
8527 	/* init golden registers */
8528 	cik_init_golden_registers(rdev);
8529 
8530 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8531 		radeon_pm_resume(rdev);
8532 
8533 	rdev->accel_working = true;
8534 	r = cik_startup(rdev);
8535 	if (r) {
8536 		DRM_ERROR("cik startup failed on resume\n");
8537 		rdev->accel_working = false;
8538 		return r;
8539 	}
8540 
8541 	return r;
8542 
8543 }
8544 
8545 /**
8546  * cik_suspend - suspend the asic
8547  *
8548  * @rdev: radeon_device pointer
8549  *
8550  * Bring the chip into a state suitable for suspend (CIK).
8551  * Called at suspend.
8552  * Returns 0 for success.
8553  */
8554 int cik_suspend(struct radeon_device *rdev)
8555 {
8556 	radeon_pm_suspend(rdev);
8557 	radeon_audio_fini(rdev);
8558 	radeon_vm_manager_fini(rdev);
8559 	cik_cp_enable(rdev, false);
8560 	cik_sdma_enable(rdev, false);
8561 	if (rdev->has_uvd) {
8562 		uvd_v1_0_fini(rdev);
8563 		radeon_uvd_suspend(rdev);
8564 	}
8565 	if (rdev->has_vce)
8566 		radeon_vce_suspend(rdev);
8567 	cik_fini_pg(rdev);
8568 	cik_fini_cg(rdev);
8569 	cik_irq_suspend(rdev);
8570 	radeon_wb_disable(rdev);
8571 	cik_pcie_gart_disable(rdev);
8572 	return 0;
8573 }
8574 
8575 /* Plan is to move initialization in that function and use
8576  * helper function so that radeon_device_init pretty much
8577  * do nothing more than calling asic specific function. This
8578  * should also allow to remove a bunch of callback function
8579  * like vram_info.
8580  */
8581 /**
8582  * cik_init - asic specific driver and hw init
8583  *
8584  * @rdev: radeon_device pointer
8585  *
8586  * Setup asic specific driver variables and program the hw
8587  * to a functional state (CIK).
8588  * Called at driver startup.
8589  * Returns 0 for success, errors for failure.
8590  */
8591 int cik_init(struct radeon_device *rdev)
8592 {
8593 	struct radeon_ring *ring;
8594 	int r;
8595 
8596 	/* Read BIOS */
8597 	if (!radeon_get_bios(rdev)) {
8598 		if (ASIC_IS_AVIVO(rdev))
8599 			return -EINVAL;
8600 	}
8601 	/* Must be an ATOMBIOS */
8602 	if (!rdev->is_atom_bios) {
8603 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8604 		return -EINVAL;
8605 	}
8606 	r = radeon_atombios_init(rdev);
8607 	if (r)
8608 		return r;
8609 
8610 	/* Post card if necessary */
8611 	if (!radeon_card_posted(rdev)) {
8612 		if (!rdev->bios) {
8613 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8614 			return -EINVAL;
8615 		}
8616 		DRM_INFO("GPU not posted. posting now...\n");
8617 		atom_asic_init(rdev->mode_info.atom_context);
8618 	}
8619 	/* init golden registers */
8620 	cik_init_golden_registers(rdev);
8621 	/* Initialize scratch registers */
8622 	cik_scratch_init(rdev);
8623 	/* Initialize surface registers */
8624 	radeon_surface_init(rdev);
8625 	/* Initialize clocks */
8626 	radeon_get_clock_info(rdev->ddev);
8627 
8628 	/* Fence driver */
8629 	r = radeon_fence_driver_init(rdev);
8630 	if (r)
8631 		return r;
8632 
8633 	/* initialize memory controller */
8634 	r = cik_mc_init(rdev);
8635 	if (r)
8636 		return r;
8637 	/* Memory manager */
8638 	r = radeon_bo_init(rdev);
8639 	if (r)
8640 		return r;
8641 
8642 	if (rdev->flags & RADEON_IS_IGP) {
8643 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8644 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8645 			r = cik_init_microcode(rdev);
8646 			if (r) {
8647 				DRM_ERROR("Failed to load firmware!\n");
8648 				return r;
8649 			}
8650 		}
8651 	} else {
8652 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8653 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8654 		    !rdev->mc_fw) {
8655 			r = cik_init_microcode(rdev);
8656 			if (r) {
8657 				DRM_ERROR("Failed to load firmware!\n");
8658 				return r;
8659 			}
8660 		}
8661 	}
8662 
8663 	/* Initialize power management */
8664 	radeon_pm_init(rdev);
8665 
8666 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8667 	ring->ring_obj = NULL;
8668 	r600_ring_init(rdev, ring, 1024 * 1024);
8669 
8670 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8671 	ring->ring_obj = NULL;
8672 	r600_ring_init(rdev, ring, 1024 * 1024);
8673 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8674 	if (r)
8675 		return r;
8676 
8677 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8678 	ring->ring_obj = NULL;
8679 	r600_ring_init(rdev, ring, 1024 * 1024);
8680 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8681 	if (r)
8682 		return r;
8683 
8684 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685 	ring->ring_obj = NULL;
8686 	r600_ring_init(rdev, ring, 256 * 1024);
8687 
8688 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8689 	ring->ring_obj = NULL;
8690 	r600_ring_init(rdev, ring, 256 * 1024);
8691 
8692 	cik_uvd_init(rdev);
8693 	cik_vce_init(rdev);
8694 
8695 	rdev->ih.ring_obj = NULL;
8696 	r600_ih_ring_init(rdev, 64 * 1024);
8697 
8698 	r = r600_pcie_gart_init(rdev);
8699 	if (r)
8700 		return r;
8701 
8702 	rdev->accel_working = true;
8703 	r = cik_startup(rdev);
8704 	if (r) {
8705 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8706 		cik_cp_fini(rdev);
8707 		cik_sdma_fini(rdev);
8708 		cik_irq_fini(rdev);
8709 		sumo_rlc_fini(rdev);
8710 		cik_mec_fini(rdev);
8711 		radeon_wb_fini(rdev);
8712 		radeon_ib_pool_fini(rdev);
8713 		radeon_vm_manager_fini(rdev);
8714 		radeon_irq_kms_fini(rdev);
8715 		cik_pcie_gart_fini(rdev);
8716 		rdev->accel_working = false;
8717 	}
8718 
8719 	/* Don't start up if the MC ucode is missing.
8720 	 * The default clocks and voltages before the MC ucode
8721 	 * is loaded are not suffient for advanced operations.
8722 	 */
8723 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8724 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8725 		return -EINVAL;
8726 	}
8727 
8728 	return 0;
8729 }
8730 
8731 /**
8732  * cik_fini - asic specific driver and hw fini
8733  *
8734  * @rdev: radeon_device pointer
8735  *
8736  * Tear down the asic specific driver variables and program the hw
8737  * to an idle state (CIK).
8738  * Called at driver unload.
8739  */
8740 void cik_fini(struct radeon_device *rdev)
8741 {
8742 	radeon_pm_fini(rdev);
8743 	cik_cp_fini(rdev);
8744 	cik_sdma_fini(rdev);
8745 	cik_fini_pg(rdev);
8746 	cik_fini_cg(rdev);
8747 	cik_irq_fini(rdev);
8748 	sumo_rlc_fini(rdev);
8749 	cik_mec_fini(rdev);
8750 	radeon_wb_fini(rdev);
8751 	radeon_vm_manager_fini(rdev);
8752 	radeon_ib_pool_fini(rdev);
8753 	radeon_irq_kms_fini(rdev);
8754 	uvd_v1_0_fini(rdev);
8755 	radeon_uvd_fini(rdev);
8756 	radeon_vce_fini(rdev);
8757 	cik_pcie_gart_fini(rdev);
8758 	r600_vram_scratch_fini(rdev);
8759 	radeon_gem_fini(rdev);
8760 	radeon_fence_driver_fini(rdev);
8761 	radeon_bo_fini(rdev);
8762 	radeon_atombios_fini(rdev);
8763 	kfree(rdev->bios);
8764 	rdev->bios = NULL;
8765 }
8766 
8767 void dce8_program_fmt(struct drm_encoder *encoder)
8768 {
8769 	struct drm_device *dev = encoder->dev;
8770 	struct radeon_device *rdev = dev->dev_private;
8771 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8772 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8773 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8774 	int bpc = 0;
8775 	u32 tmp = 0;
8776 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8777 
8778 	if (connector) {
8779 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8780 		bpc = radeon_get_monitor_bpc(connector);
8781 		dither = radeon_connector->dither;
8782 	}
8783 
8784 	/* LVDS/eDP FMT is set up by atom */
8785 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8786 		return;
8787 
8788 	/* not needed for analog */
8789 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8790 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8791 		return;
8792 
8793 	if (bpc == 0)
8794 		return;
8795 
8796 	switch (bpc) {
8797 	case 6:
8798 		if (dither == RADEON_FMT_DITHER_ENABLE)
8799 			/* XXX sort out optimal dither settings */
8800 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8801 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8802 		else
8803 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8804 		break;
8805 	case 8:
8806 		if (dither == RADEON_FMT_DITHER_ENABLE)
8807 			/* XXX sort out optimal dither settings */
8808 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8809 				FMT_RGB_RANDOM_ENABLE |
8810 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8811 		else
8812 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8813 		break;
8814 	case 10:
8815 		if (dither == RADEON_FMT_DITHER_ENABLE)
8816 			/* XXX sort out optimal dither settings */
8817 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818 				FMT_RGB_RANDOM_ENABLE |
8819 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8820 		else
8821 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8822 		break;
8823 	default:
8824 		/* not needed */
8825 		break;
8826 	}
8827 
8828 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8829 }
8830 
8831 /* display watermark setup */
8832 /**
8833  * dce8_line_buffer_adjust - Set up the line buffer
8834  *
8835  * @rdev: radeon_device pointer
8836  * @radeon_crtc: the selected display controller
8837  * @mode: the current display mode on the selected display
8838  * controller
8839  *
8840  * Setup up the line buffer allocation for
8841  * the selected display controller (CIK).
8842  * Returns the line buffer size in pixels.
8843  */
8844 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8845 				   struct radeon_crtc *radeon_crtc,
8846 				   struct drm_display_mode *mode)
8847 {
8848 	u32 tmp, buffer_alloc, i;
8849 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8850 	/*
8851 	 * Line Buffer Setup
8852 	 * There are 6 line buffers, one for each display controllers.
8853 	 * There are 3 partitions per LB. Select the number of partitions
8854 	 * to enable based on the display width.  For display widths larger
8855 	 * than 4096, you need use to use 2 display controllers and combine
8856 	 * them using the stereo blender.
8857 	 */
8858 	if (radeon_crtc->base.enabled && mode) {
8859 		if (mode->crtc_hdisplay < 1920) {
8860 			tmp = 1;
8861 			buffer_alloc = 2;
8862 		} else if (mode->crtc_hdisplay < 2560) {
8863 			tmp = 2;
8864 			buffer_alloc = 2;
8865 		} else if (mode->crtc_hdisplay < 4096) {
8866 			tmp = 0;
8867 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8868 		} else {
8869 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8870 			tmp = 0;
8871 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872 		}
8873 	} else {
8874 		tmp = 1;
8875 		buffer_alloc = 0;
8876 	}
8877 
8878 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8879 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8880 
8881 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8882 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8883 	for (i = 0; i < rdev->usec_timeout; i++) {
8884 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8885 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8886 			break;
8887 		udelay(1);
8888 	}
8889 
8890 	if (radeon_crtc->base.enabled && mode) {
8891 		switch (tmp) {
8892 		case 0:
8893 		default:
8894 			return 4096 * 2;
8895 		case 1:
8896 			return 1920 * 2;
8897 		case 2:
8898 			return 2560 * 2;
8899 		}
8900 	}
8901 
8902 	/* controller not enabled, so no lb used */
8903 	return 0;
8904 }
8905 
8906 /**
8907  * cik_get_number_of_dram_channels - get the number of dram channels
8908  *
8909  * @rdev: radeon_device pointer
8910  *
8911  * Look up the number of video ram channels (CIK).
8912  * Used for display watermark bandwidth calculations
8913  * Returns the number of dram channels
8914  */
8915 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8916 {
8917 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8918 
8919 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8920 	case 0:
8921 	default:
8922 		return 1;
8923 	case 1:
8924 		return 2;
8925 	case 2:
8926 		return 4;
8927 	case 3:
8928 		return 8;
8929 	case 4:
8930 		return 3;
8931 	case 5:
8932 		return 6;
8933 	case 6:
8934 		return 10;
8935 	case 7:
8936 		return 12;
8937 	case 8:
8938 		return 16;
8939 	}
8940 }
8941 
8942 struct dce8_wm_params {
8943 	u32 dram_channels; /* number of dram channels */
8944 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8945 	u32 sclk;          /* engine clock in kHz */
8946 	u32 disp_clk;      /* display clock in kHz */
8947 	u32 src_width;     /* viewport width */
8948 	u32 active_time;   /* active display time in ns */
8949 	u32 blank_time;    /* blank time in ns */
8950 	bool interlaced;    /* mode is interlaced */
8951 	fixed20_12 vsc;    /* vertical scale ratio */
8952 	u32 num_heads;     /* number of active crtcs */
8953 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8954 	u32 lb_size;       /* line buffer allocated to pipe */
8955 	u32 vtaps;         /* vertical scaler taps */
8956 };
8957 
8958 /**
8959  * dce8_dram_bandwidth - get the dram bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the raw dram bandwidth (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the dram bandwidth in MBytes/s
8966  */
8967 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8968 {
8969 	/* Calculate raw DRAM Bandwidth */
8970 	fixed20_12 dram_efficiency; /* 0.7 */
8971 	fixed20_12 yclk, dram_channels, bandwidth;
8972 	fixed20_12 a;
8973 
8974 	a.full = dfixed_const(1000);
8975 	yclk.full = dfixed_const(wm->yclk);
8976 	yclk.full = dfixed_div(yclk, a);
8977 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8978 	a.full = dfixed_const(10);
8979 	dram_efficiency.full = dfixed_const(7);
8980 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8981 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8982 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8983 
8984 	return dfixed_trunc(bandwidth);
8985 }
8986 
8987 /**
8988  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dram bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dram bandwidth for display in MBytes/s
8995  */
8996 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8997 {
8998 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8999 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9000 	fixed20_12 yclk, dram_channels, bandwidth;
9001 	fixed20_12 a;
9002 
9003 	a.full = dfixed_const(1000);
9004 	yclk.full = dfixed_const(wm->yclk);
9005 	yclk.full = dfixed_div(yclk, a);
9006 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9007 	a.full = dfixed_const(10);
9008 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9009 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9010 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9011 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9012 
9013 	return dfixed_trunc(bandwidth);
9014 }
9015 
9016 /**
9017  * dce8_data_return_bandwidth - get the data return bandwidth
9018  *
9019  * @wm: watermark calculation data
9020  *
9021  * Calculate the data return bandwidth used for display (CIK).
9022  * Used for display watermark bandwidth calculations
9023  * Returns the data return bandwidth in MBytes/s
9024  */
9025 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9026 {
9027 	/* Calculate the display Data return Bandwidth */
9028 	fixed20_12 return_efficiency; /* 0.8 */
9029 	fixed20_12 sclk, bandwidth;
9030 	fixed20_12 a;
9031 
9032 	a.full = dfixed_const(1000);
9033 	sclk.full = dfixed_const(wm->sclk);
9034 	sclk.full = dfixed_div(sclk, a);
9035 	a.full = dfixed_const(10);
9036 	return_efficiency.full = dfixed_const(8);
9037 	return_efficiency.full = dfixed_div(return_efficiency, a);
9038 	a.full = dfixed_const(32);
9039 	bandwidth.full = dfixed_mul(a, sclk);
9040 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9041 
9042 	return dfixed_trunc(bandwidth);
9043 }
9044 
9045 /**
9046  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9047  *
9048  * @wm: watermark calculation data
9049  *
9050  * Calculate the dmif bandwidth used for display (CIK).
9051  * Used for display watermark bandwidth calculations
9052  * Returns the dmif bandwidth in MBytes/s
9053  */
9054 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9055 {
9056 	/* Calculate the DMIF Request Bandwidth */
9057 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9058 	fixed20_12 disp_clk, bandwidth;
9059 	fixed20_12 a, b;
9060 
9061 	a.full = dfixed_const(1000);
9062 	disp_clk.full = dfixed_const(wm->disp_clk);
9063 	disp_clk.full = dfixed_div(disp_clk, a);
9064 	a.full = dfixed_const(32);
9065 	b.full = dfixed_mul(a, disp_clk);
9066 
9067 	a.full = dfixed_const(10);
9068 	disp_clk_request_efficiency.full = dfixed_const(8);
9069 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9070 
9071 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9072 
9073 	return dfixed_trunc(bandwidth);
9074 }
9075 
9076 /**
9077  * dce8_available_bandwidth - get the min available bandwidth
9078  *
9079  * @wm: watermark calculation data
9080  *
9081  * Calculate the min available bandwidth used for display (CIK).
9082  * Used for display watermark bandwidth calculations
9083  * Returns the min available bandwidth in MBytes/s
9084  */
9085 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9086 {
9087 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9088 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9089 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9090 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9091 
9092 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9093 }
9094 
9095 /**
9096  * dce8_average_bandwidth - get the average available bandwidth
9097  *
9098  * @wm: watermark calculation data
9099  *
9100  * Calculate the average available bandwidth used for display (CIK).
9101  * Used for display watermark bandwidth calculations
9102  * Returns the average available bandwidth in MBytes/s
9103  */
9104 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9105 {
9106 	/* Calculate the display mode Average Bandwidth
9107 	 * DisplayMode should contain the source and destination dimensions,
9108 	 * timing, etc.
9109 	 */
9110 	fixed20_12 bpp;
9111 	fixed20_12 line_time;
9112 	fixed20_12 src_width;
9113 	fixed20_12 bandwidth;
9114 	fixed20_12 a;
9115 
9116 	a.full = dfixed_const(1000);
9117 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9118 	line_time.full = dfixed_div(line_time, a);
9119 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9120 	src_width.full = dfixed_const(wm->src_width);
9121 	bandwidth.full = dfixed_mul(src_width, bpp);
9122 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9123 	bandwidth.full = dfixed_div(bandwidth, line_time);
9124 
9125 	return dfixed_trunc(bandwidth);
9126 }
9127 
9128 /**
9129  * dce8_latency_watermark - get the latency watermark
9130  *
9131  * @wm: watermark calculation data
9132  *
9133  * Calculate the latency watermark (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the latency watermark in ns
9136  */
9137 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9138 {
9139 	/* First calculate the latency in ns */
9140 	u32 mc_latency = 2000; /* 2000 ns. */
9141 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9142 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9143 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9144 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9145 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9146 		(wm->num_heads * cursor_line_pair_return_time);
9147 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9148 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9149 	u32 tmp, dmif_size = 12288;
9150 	fixed20_12 a, b, c;
9151 
9152 	if (wm->num_heads == 0)
9153 		return 0;
9154 
9155 	a.full = dfixed_const(2);
9156 	b.full = dfixed_const(1);
9157 	if ((wm->vsc.full > a.full) ||
9158 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9159 	    (wm->vtaps >= 5) ||
9160 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9161 		max_src_lines_per_dst_line = 4;
9162 	else
9163 		max_src_lines_per_dst_line = 2;
9164 
9165 	a.full = dfixed_const(available_bandwidth);
9166 	b.full = dfixed_const(wm->num_heads);
9167 	a.full = dfixed_div(a, b);
9168 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9169 	tmp = min(dfixed_trunc(a), tmp);
9170 
9171 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9172 
9173 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9174 	b.full = dfixed_const(1000);
9175 	c.full = dfixed_const(lb_fill_bw);
9176 	b.full = dfixed_div(c, b);
9177 	a.full = dfixed_div(a, b);
9178 	line_fill_time = dfixed_trunc(a);
9179 
9180 	if (line_fill_time < wm->active_time)
9181 		return latency;
9182 	else
9183 		return latency + (line_fill_time - wm->active_time);
9184 
9185 }
9186 
9187 /**
9188  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9189  * average and available dram bandwidth
9190  *
9191  * @wm: watermark calculation data
9192  *
9193  * Check if the display average bandwidth fits in the display
9194  * dram bandwidth (CIK).
9195  * Used for display watermark bandwidth calculations
9196  * Returns true if the display fits, false if not.
9197  */
9198 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9199 {
9200 	if (dce8_average_bandwidth(wm) <=
9201 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9202 		return true;
9203 	else
9204 		return false;
9205 }
9206 
9207 /**
9208  * dce8_average_bandwidth_vs_available_bandwidth - check
9209  * average and available bandwidth
9210  *
9211  * @wm: watermark calculation data
9212  *
9213  * Check if the display average bandwidth fits in the display
9214  * available bandwidth (CIK).
9215  * Used for display watermark bandwidth calculations
9216  * Returns true if the display fits, false if not.
9217  */
9218 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9219 {
9220 	if (dce8_average_bandwidth(wm) <=
9221 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9222 		return true;
9223 	else
9224 		return false;
9225 }
9226 
9227 /**
9228  * dce8_check_latency_hiding - check latency hiding
9229  *
9230  * @wm: watermark calculation data
9231  *
9232  * Check latency hiding (CIK).
9233  * Used for display watermark bandwidth calculations
9234  * Returns true if the display fits, false if not.
9235  */
9236 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9237 {
9238 	u32 lb_partitions = wm->lb_size / wm->src_width;
9239 	u32 line_time = wm->active_time + wm->blank_time;
9240 	u32 latency_tolerant_lines;
9241 	u32 latency_hiding;
9242 	fixed20_12 a;
9243 
9244 	a.full = dfixed_const(1);
9245 	if (wm->vsc.full > a.full)
9246 		latency_tolerant_lines = 1;
9247 	else {
9248 		if (lb_partitions <= (wm->vtaps + 1))
9249 			latency_tolerant_lines = 1;
9250 		else
9251 			latency_tolerant_lines = 2;
9252 	}
9253 
9254 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9255 
9256 	if (dce8_latency_watermark(wm) <= latency_hiding)
9257 		return true;
9258 	else
9259 		return false;
9260 }
9261 
9262 /**
9263  * dce8_program_watermarks - program display watermarks
9264  *
9265  * @rdev: radeon_device pointer
9266  * @radeon_crtc: the selected display controller
9267  * @lb_size: line buffer size
9268  * @num_heads: number of display controllers in use
9269  *
9270  * Calculate and program the display watermarks for the
9271  * selected display controller (CIK).
9272  */
9273 static void dce8_program_watermarks(struct radeon_device *rdev,
9274 				    struct radeon_crtc *radeon_crtc,
9275 				    u32 lb_size, u32 num_heads)
9276 {
9277 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9278 	struct dce8_wm_params wm_low, wm_high;
9279 	u32 active_time;
9280 	u32 line_time = 0;
9281 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9282 	u32 tmp, wm_mask;
9283 
9284 	if (radeon_crtc->base.enabled && num_heads && mode) {
9285 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9286 					    (u32)mode->clock);
9287 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9288 					  (u32)mode->clock);
9289 		line_time = min(line_time, (u32)65535);
9290 
9291 		/* watermark for high clocks */
9292 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9293 		    rdev->pm.dpm_enabled) {
9294 			wm_high.yclk =
9295 				radeon_dpm_get_mclk(rdev, false) * 10;
9296 			wm_high.sclk =
9297 				radeon_dpm_get_sclk(rdev, false) * 10;
9298 		} else {
9299 			wm_high.yclk = rdev->pm.current_mclk * 10;
9300 			wm_high.sclk = rdev->pm.current_sclk * 10;
9301 		}
9302 
9303 		wm_high.disp_clk = mode->clock;
9304 		wm_high.src_width = mode->crtc_hdisplay;
9305 		wm_high.active_time = active_time;
9306 		wm_high.blank_time = line_time - wm_high.active_time;
9307 		wm_high.interlaced = false;
9308 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9309 			wm_high.interlaced = true;
9310 		wm_high.vsc = radeon_crtc->vsc;
9311 		wm_high.vtaps = 1;
9312 		if (radeon_crtc->rmx_type != RMX_OFF)
9313 			wm_high.vtaps = 2;
9314 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9315 		wm_high.lb_size = lb_size;
9316 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9317 		wm_high.num_heads = num_heads;
9318 
9319 		/* set for high clocks */
9320 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9321 
9322 		/* possibly force display priority to high */
9323 		/* should really do this at mode validation time... */
9324 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9325 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9326 		    !dce8_check_latency_hiding(&wm_high) ||
9327 		    (rdev->disp_priority == 2)) {
9328 			DRM_DEBUG_KMS("force priority to high\n");
9329 		}
9330 
9331 		/* watermark for low clocks */
9332 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9333 		    rdev->pm.dpm_enabled) {
9334 			wm_low.yclk =
9335 				radeon_dpm_get_mclk(rdev, true) * 10;
9336 			wm_low.sclk =
9337 				radeon_dpm_get_sclk(rdev, true) * 10;
9338 		} else {
9339 			wm_low.yclk = rdev->pm.current_mclk * 10;
9340 			wm_low.sclk = rdev->pm.current_sclk * 10;
9341 		}
9342 
9343 		wm_low.disp_clk = mode->clock;
9344 		wm_low.src_width = mode->crtc_hdisplay;
9345 		wm_low.active_time = active_time;
9346 		wm_low.blank_time = line_time - wm_low.active_time;
9347 		wm_low.interlaced = false;
9348 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9349 			wm_low.interlaced = true;
9350 		wm_low.vsc = radeon_crtc->vsc;
9351 		wm_low.vtaps = 1;
9352 		if (radeon_crtc->rmx_type != RMX_OFF)
9353 			wm_low.vtaps = 2;
9354 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9355 		wm_low.lb_size = lb_size;
9356 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9357 		wm_low.num_heads = num_heads;
9358 
9359 		/* set for low clocks */
9360 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9361 
9362 		/* possibly force display priority to high */
9363 		/* should really do this at mode validation time... */
9364 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9365 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9366 		    !dce8_check_latency_hiding(&wm_low) ||
9367 		    (rdev->disp_priority == 2)) {
9368 			DRM_DEBUG_KMS("force priority to high\n");
9369 		}
9370 
9371 		/* Save number of lines the linebuffer leads before the scanout */
9372 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9373 	}
9374 
9375 	/* select wm A */
9376 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9377 	tmp = wm_mask;
9378 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9379 	tmp |= LATENCY_WATERMARK_MASK(1);
9380 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9381 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9382 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9383 		LATENCY_HIGH_WATERMARK(line_time)));
9384 	/* select wm B */
9385 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9386 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9387 	tmp |= LATENCY_WATERMARK_MASK(2);
9388 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9389 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9390 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9391 		LATENCY_HIGH_WATERMARK(line_time)));
9392 	/* restore original selection */
9393 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9394 
9395 	/* save values for DPM */
9396 	radeon_crtc->line_time = line_time;
9397 	radeon_crtc->wm_high = latency_watermark_a;
9398 	radeon_crtc->wm_low = latency_watermark_b;
9399 }
9400 
9401 /**
9402  * dce8_bandwidth_update - program display watermarks
9403  *
9404  * @rdev: radeon_device pointer
9405  *
9406  * Calculate and program the display watermarks and line
9407  * buffer allocation (CIK).
9408  */
9409 void dce8_bandwidth_update(struct radeon_device *rdev)
9410 {
9411 	struct drm_display_mode *mode = NULL;
9412 	u32 num_heads = 0, lb_size;
9413 	int i;
9414 
9415 	if (!rdev->mode_info.mode_config_initialized)
9416 		return;
9417 
9418 	radeon_update_display_priority(rdev);
9419 
9420 	for (i = 0; i < rdev->num_crtc; i++) {
9421 		if (rdev->mode_info.crtcs[i]->base.enabled)
9422 			num_heads++;
9423 	}
9424 	for (i = 0; i < rdev->num_crtc; i++) {
9425 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9426 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9427 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9428 	}
9429 }
9430 
9431 /**
9432  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9433  *
9434  * @rdev: radeon_device pointer
9435  *
9436  * Fetches a GPU clock counter snapshot (SI).
9437  * Returns the 64 bit clock counter snapshot.
9438  */
9439 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9440 {
9441 	uint64_t clock;
9442 
9443 	mutex_lock(&rdev->gpu_clock_mutex);
9444 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9445 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9446 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9447 	mutex_unlock(&rdev->gpu_clock_mutex);
9448 	return clock;
9449 }
9450 
9451 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9452 			     u32 cntl_reg, u32 status_reg)
9453 {
9454 	int r, i;
9455 	struct atom_clock_dividers dividers;
9456 	uint32_t tmp;
9457 
9458 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9459 					   clock, false, &dividers);
9460 	if (r)
9461 		return r;
9462 
9463 	tmp = RREG32_SMC(cntl_reg);
9464 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9465 	tmp |= dividers.post_divider;
9466 	WREG32_SMC(cntl_reg, tmp);
9467 
9468 	for (i = 0; i < 100; i++) {
9469 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9470 			break;
9471 		mdelay(10);
9472 	}
9473 	if (i == 100)
9474 		return -ETIMEDOUT;
9475 
9476 	return 0;
9477 }
9478 
9479 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9480 {
9481 	int r = 0;
9482 
9483 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9484 	if (r)
9485 		return r;
9486 
9487 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9488 	return r;
9489 }
9490 
9491 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9492 {
9493 	int r, i;
9494 	struct atom_clock_dividers dividers;
9495 	u32 tmp;
9496 
9497 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9498 					   ecclk, false, &dividers);
9499 	if (r)
9500 		return r;
9501 
9502 	for (i = 0; i < 100; i++) {
9503 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9504 			break;
9505 		mdelay(10);
9506 	}
9507 	if (i == 100)
9508 		return -ETIMEDOUT;
9509 
9510 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9511 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9512 	tmp |= dividers.post_divider;
9513 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9514 
9515 	for (i = 0; i < 100; i++) {
9516 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9517 			break;
9518 		mdelay(10);
9519 	}
9520 	if (i == 100)
9521 		return -ETIMEDOUT;
9522 
9523 	return 0;
9524 }
9525 
9526 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9527 {
9528 	struct pci_dev *root = rdev->pdev->bus->self;
9529 	int bridge_pos, gpu_pos;
9530 	u32 speed_cntl, mask, current_data_rate;
9531 	int ret, i;
9532 	u16 tmp16;
9533 
9534 	if (pci_is_root_bus(rdev->pdev->bus))
9535 		return;
9536 
9537 	if (radeon_pcie_gen2 == 0)
9538 		return;
9539 
9540 	if (rdev->flags & RADEON_IS_IGP)
9541 		return;
9542 
9543 	if (!(rdev->flags & RADEON_IS_PCIE))
9544 		return;
9545 
9546 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9547 	if (ret != 0)
9548 		return;
9549 
9550 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9551 		return;
9552 
9553 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9554 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9555 		LC_CURRENT_DATA_RATE_SHIFT;
9556 	if (mask & DRM_PCIE_SPEED_80) {
9557 		if (current_data_rate == 2) {
9558 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9559 			return;
9560 		}
9561 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9562 	} else if (mask & DRM_PCIE_SPEED_50) {
9563 		if (current_data_rate == 1) {
9564 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9565 			return;
9566 		}
9567 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9568 	}
9569 
9570 	bridge_pos = pci_pcie_cap(root);
9571 	if (!bridge_pos)
9572 		return;
9573 
9574 	gpu_pos = pci_pcie_cap(rdev->pdev);
9575 	if (!gpu_pos)
9576 		return;
9577 
9578 	if (mask & DRM_PCIE_SPEED_80) {
9579 		/* re-try equalization if gen3 is not already enabled */
9580 		if (current_data_rate != 2) {
9581 			u16 bridge_cfg, gpu_cfg;
9582 			u16 bridge_cfg2, gpu_cfg2;
9583 			u32 max_lw, current_lw, tmp;
9584 
9585 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9586 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9587 
9588 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9589 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9590 
9591 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9592 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9593 
9594 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9595 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9596 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9597 
9598 			if (current_lw < max_lw) {
9599 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9600 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9601 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9602 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9603 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9604 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9605 				}
9606 			}
9607 
9608 			for (i = 0; i < 10; i++) {
9609 				/* check status */
9610 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9611 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9612 					break;
9613 
9614 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9615 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9616 
9617 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9618 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9619 
9620 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9621 				tmp |= LC_SET_QUIESCE;
9622 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9623 
9624 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9625 				tmp |= LC_REDO_EQ;
9626 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9627 
9628 				mdelay(100);
9629 
9630 				/* linkctl */
9631 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9632 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9633 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9634 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9635 
9636 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9637 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9638 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9639 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9640 
9641 				/* linkctl2 */
9642 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9643 				tmp16 &= ~((1 << 4) | (7 << 9));
9644 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9645 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9646 
9647 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9648 				tmp16 &= ~((1 << 4) | (7 << 9));
9649 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9650 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9651 
9652 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9653 				tmp &= ~LC_SET_QUIESCE;
9654 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9655 			}
9656 		}
9657 	}
9658 
9659 	/* set the link speed */
9660 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9661 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9662 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9663 
9664 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9665 	tmp16 &= ~0xf;
9666 	if (mask & DRM_PCIE_SPEED_80)
9667 		tmp16 |= 3; /* gen3 */
9668 	else if (mask & DRM_PCIE_SPEED_50)
9669 		tmp16 |= 2; /* gen2 */
9670 	else
9671 		tmp16 |= 1; /* gen1 */
9672 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9673 
9674 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9675 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9676 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9677 
9678 	for (i = 0; i < rdev->usec_timeout; i++) {
9679 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9680 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9681 			break;
9682 		udelay(1);
9683 	}
9684 }
9685 
9686 static void cik_program_aspm(struct radeon_device *rdev)
9687 {
9688 	u32 data, orig;
9689 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9690 	bool disable_clkreq = false;
9691 
9692 	if (radeon_aspm == 0)
9693 		return;
9694 
9695 	/* XXX double check IGPs */
9696 	if (rdev->flags & RADEON_IS_IGP)
9697 		return;
9698 
9699 	if (!(rdev->flags & RADEON_IS_PCIE))
9700 		return;
9701 
9702 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9703 	data &= ~LC_XMIT_N_FTS_MASK;
9704 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9705 	if (orig != data)
9706 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9707 
9708 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9709 	data |= LC_GO_TO_RECOVERY;
9710 	if (orig != data)
9711 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9712 
9713 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9714 	data |= P_IGNORE_EDB_ERR;
9715 	if (orig != data)
9716 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9717 
9718 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9719 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9720 	data |= LC_PMI_TO_L1_DIS;
9721 	if (!disable_l0s)
9722 		data |= LC_L0S_INACTIVITY(7);
9723 
9724 	if (!disable_l1) {
9725 		data |= LC_L1_INACTIVITY(7);
9726 		data &= ~LC_PMI_TO_L1_DIS;
9727 		if (orig != data)
9728 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9729 
9730 		if (!disable_plloff_in_l1) {
9731 			bool clk_req_support;
9732 
9733 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9734 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9735 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9736 			if (orig != data)
9737 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9738 
9739 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9740 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9741 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9742 			if (orig != data)
9743 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9744 
9745 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9746 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9747 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9748 			if (orig != data)
9749 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9750 
9751 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9752 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9753 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9754 			if (orig != data)
9755 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9756 
9757 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9758 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9759 			data |= LC_DYN_LANES_PWR_STATE(3);
9760 			if (orig != data)
9761 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9762 
9763 			if (!disable_clkreq &&
9764 			    !pci_is_root_bus(rdev->pdev->bus)) {
9765 				struct pci_dev *root = rdev->pdev->bus->self;
9766 				u32 lnkcap;
9767 
9768 				clk_req_support = false;
9769 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9770 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9771 					clk_req_support = true;
9772 			} else {
9773 				clk_req_support = false;
9774 			}
9775 
9776 			if (clk_req_support) {
9777 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9778 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9779 				if (orig != data)
9780 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9781 
9782 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9783 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9784 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9785 				if (orig != data)
9786 					WREG32_SMC(THM_CLK_CNTL, data);
9787 
9788 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9789 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9790 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9791 				if (orig != data)
9792 					WREG32_SMC(MISC_CLK_CTRL, data);
9793 
9794 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9795 				data &= ~BCLK_AS_XCLK;
9796 				if (orig != data)
9797 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9798 
9799 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9800 				data &= ~FORCE_BIF_REFCLK_EN;
9801 				if (orig != data)
9802 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9803 
9804 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9805 				data &= ~MPLL_CLKOUT_SEL_MASK;
9806 				data |= MPLL_CLKOUT_SEL(4);
9807 				if (orig != data)
9808 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9809 			}
9810 		}
9811 	} else {
9812 		if (orig != data)
9813 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9814 	}
9815 
9816 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9817 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9818 	if (orig != data)
9819 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9820 
9821 	if (!disable_l0s) {
9822 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9823 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9824 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9825 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9826 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9827 				data &= ~LC_L0S_INACTIVITY_MASK;
9828 				if (orig != data)
9829 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9830 			}
9831 		}
9832 	}
9833 }
9834