xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 7aacf86b)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
40 
41 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
50 
51 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
52 MODULE_FIRMWARE("radeon/bonaire_me.bin");
53 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
55 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
57 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
58 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
72 MODULE_FIRMWARE("radeon/hawaii_me.bin");
73 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
75 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
77 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
78 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
80 
81 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
87 
88 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
89 MODULE_FIRMWARE("radeon/kaveri_me.bin");
90 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
92 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
93 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
94 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
97 MODULE_FIRMWARE("radeon/KABINI_me.bin");
98 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
99 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
100 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
101 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
104 MODULE_FIRMWARE("radeon/kabini_me.bin");
105 MODULE_FIRMWARE("radeon/kabini_ce.bin");
106 MODULE_FIRMWARE("radeon/kabini_mec.bin");
107 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
108 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
116 
117 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
118 MODULE_FIRMWARE("radeon/mullins_me.bin");
119 MODULE_FIRMWARE("radeon/mullins_ce.bin");
120 MODULE_FIRMWARE("radeon/mullins_mec.bin");
121 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
122 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
123 
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
129 extern void sumo_rlc_fini(struct radeon_device *rdev);
130 extern int sumo_rlc_init(struct radeon_device *rdev);
131 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
132 extern void si_rlc_reset(struct radeon_device *rdev);
133 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
134 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
135 extern int cik_sdma_resume(struct radeon_device *rdev);
136 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
137 extern void cik_sdma_fini(struct radeon_device *rdev);
138 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
139 static void cik_rlc_stop(struct radeon_device *rdev);
140 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
141 static void cik_program_aspm(struct radeon_device *rdev);
142 static void cik_init_pg(struct radeon_device *rdev);
143 static void cik_init_cg(struct radeon_device *rdev);
144 static void cik_fini_pg(struct radeon_device *rdev);
145 static void cik_fini_cg(struct radeon_device *rdev);
146 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
147 					  bool enable);
148 
149 /**
150  * cik_get_allowed_info_register - fetch the register for the info ioctl
151  *
152  * @rdev: radeon_device pointer
153  * @reg: register offset in bytes
154  * @val: register value
155  *
156  * Returns 0 for success or -EINVAL for an invalid register
157  *
158  */
159 int cik_get_allowed_info_register(struct radeon_device *rdev,
160 				  u32 reg, u32 *val)
161 {
162 	switch (reg) {
163 	case GRBM_STATUS:
164 	case GRBM_STATUS2:
165 	case GRBM_STATUS_SE0:
166 	case GRBM_STATUS_SE1:
167 	case GRBM_STATUS_SE2:
168 	case GRBM_STATUS_SE3:
169 	case SRBM_STATUS:
170 	case SRBM_STATUS2:
171 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
172 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
173 	case UVD_STATUS:
174 	/* TODO VCE */
175 		*val = RREG32(reg);
176 		return 0;
177 	default:
178 		return -EINVAL;
179 	}
180 }
181 
182 /*
183  * Indirect registers accessor
184  */
185 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
186 {
187 	unsigned long flags;
188 	u32 r;
189 
190 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
191 	WREG32(CIK_DIDT_IND_INDEX, (reg));
192 	r = RREG32(CIK_DIDT_IND_DATA);
193 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
194 	return r;
195 }
196 
197 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199 	unsigned long flags;
200 
201 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
202 	WREG32(CIK_DIDT_IND_INDEX, (reg));
203 	WREG32(CIK_DIDT_IND_DATA, (v));
204 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
205 }
206 
207 /* get temperature in millidegrees */
208 int ci_get_temp(struct radeon_device *rdev)
209 {
210 	u32 temp;
211 	int actual_temp = 0;
212 
213 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
214 		CTF_TEMP_SHIFT;
215 
216 	if (temp & 0x200)
217 		actual_temp = 255;
218 	else
219 		actual_temp = temp & 0x1ff;
220 
221 	actual_temp = actual_temp * 1000;
222 
223 	return actual_temp;
224 }
225 
226 /* get temperature in millidegrees */
227 int kv_get_temp(struct radeon_device *rdev)
228 {
229 	u32 temp;
230 	int actual_temp = 0;
231 
232 	temp = RREG32_SMC(0xC0300E0C);
233 
234 	if (temp)
235 		actual_temp = (temp / 8) - 49;
236 	else
237 		actual_temp = 0;
238 
239 	actual_temp = actual_temp * 1000;
240 
241 	return actual_temp;
242 }
243 
244 /*
245  * Indirect registers accessor
246  */
247 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
248 {
249 	unsigned long flags;
250 	u32 r;
251 
252 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
253 	WREG32(PCIE_INDEX, reg);
254 	(void)RREG32(PCIE_INDEX);
255 	r = RREG32(PCIE_DATA);
256 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
257 	return r;
258 }
259 
260 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
261 {
262 	unsigned long flags;
263 
264 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
265 	WREG32(PCIE_INDEX, reg);
266 	(void)RREG32(PCIE_INDEX);
267 	WREG32(PCIE_DATA, v);
268 	(void)RREG32(PCIE_DATA);
269 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
270 }
271 
272 static const u32 spectre_rlc_save_restore_register_list[] =
273 {
274 	(0x0e00 << 16) | (0xc12c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc140 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc150 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc15c >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc168 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc170 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc178 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc204 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc2b4 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc2b8 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc2bc >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc2c0 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0x8228 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0x829c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0x869c >> 2),
303 	0x00000000,
304 	(0x0600 << 16) | (0x98f4 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0x98f8 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0x9900 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc260 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x90e8 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x3c000 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x3c00c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x8c1c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x9700 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xcd20 >> 2),
323 	0x00000000,
324 	(0x4e00 << 16) | (0xcd20 >> 2),
325 	0x00000000,
326 	(0x5e00 << 16) | (0xcd20 >> 2),
327 	0x00000000,
328 	(0x6e00 << 16) | (0xcd20 >> 2),
329 	0x00000000,
330 	(0x7e00 << 16) | (0xcd20 >> 2),
331 	0x00000000,
332 	(0x8e00 << 16) | (0xcd20 >> 2),
333 	0x00000000,
334 	(0x9e00 << 16) | (0xcd20 >> 2),
335 	0x00000000,
336 	(0xae00 << 16) | (0xcd20 >> 2),
337 	0x00000000,
338 	(0xbe00 << 16) | (0xcd20 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0x89bc >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0x8900 >> 2),
343 	0x00000000,
344 	0x3,
345 	(0x0e00 << 16) | (0xc130 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc134 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc1fc >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc208 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc264 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc268 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc26c >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc270 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc274 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc278 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc27c >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc280 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc284 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0xc288 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0xc28c >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc290 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0xc294 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0xc298 >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0xc29c >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0xc2a0 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0xc2a4 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0xc2a8 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0xc2ac  >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0xc2b0 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x301d0 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x30238 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x30250 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x30254 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30258 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x3025c >> 2),
404 	0x00000000,
405 	(0x4e00 << 16) | (0xc900 >> 2),
406 	0x00000000,
407 	(0x5e00 << 16) | (0xc900 >> 2),
408 	0x00000000,
409 	(0x6e00 << 16) | (0xc900 >> 2),
410 	0x00000000,
411 	(0x7e00 << 16) | (0xc900 >> 2),
412 	0x00000000,
413 	(0x8e00 << 16) | (0xc900 >> 2),
414 	0x00000000,
415 	(0x9e00 << 16) | (0xc900 >> 2),
416 	0x00000000,
417 	(0xae00 << 16) | (0xc900 >> 2),
418 	0x00000000,
419 	(0xbe00 << 16) | (0xc900 >> 2),
420 	0x00000000,
421 	(0x4e00 << 16) | (0xc904 >> 2),
422 	0x00000000,
423 	(0x5e00 << 16) | (0xc904 >> 2),
424 	0x00000000,
425 	(0x6e00 << 16) | (0xc904 >> 2),
426 	0x00000000,
427 	(0x7e00 << 16) | (0xc904 >> 2),
428 	0x00000000,
429 	(0x8e00 << 16) | (0xc904 >> 2),
430 	0x00000000,
431 	(0x9e00 << 16) | (0xc904 >> 2),
432 	0x00000000,
433 	(0xae00 << 16) | (0xc904 >> 2),
434 	0x00000000,
435 	(0xbe00 << 16) | (0xc904 >> 2),
436 	0x00000000,
437 	(0x4e00 << 16) | (0xc908 >> 2),
438 	0x00000000,
439 	(0x5e00 << 16) | (0xc908 >> 2),
440 	0x00000000,
441 	(0x6e00 << 16) | (0xc908 >> 2),
442 	0x00000000,
443 	(0x7e00 << 16) | (0xc908 >> 2),
444 	0x00000000,
445 	(0x8e00 << 16) | (0xc908 >> 2),
446 	0x00000000,
447 	(0x9e00 << 16) | (0xc908 >> 2),
448 	0x00000000,
449 	(0xae00 << 16) | (0xc908 >> 2),
450 	0x00000000,
451 	(0xbe00 << 16) | (0xc908 >> 2),
452 	0x00000000,
453 	(0x4e00 << 16) | (0xc90c >> 2),
454 	0x00000000,
455 	(0x5e00 << 16) | (0xc90c >> 2),
456 	0x00000000,
457 	(0x6e00 << 16) | (0xc90c >> 2),
458 	0x00000000,
459 	(0x7e00 << 16) | (0xc90c >> 2),
460 	0x00000000,
461 	(0x8e00 << 16) | (0xc90c >> 2),
462 	0x00000000,
463 	(0x9e00 << 16) | (0xc90c >> 2),
464 	0x00000000,
465 	(0xae00 << 16) | (0xc90c >> 2),
466 	0x00000000,
467 	(0xbe00 << 16) | (0xc90c >> 2),
468 	0x00000000,
469 	(0x4e00 << 16) | (0xc910 >> 2),
470 	0x00000000,
471 	(0x5e00 << 16) | (0xc910 >> 2),
472 	0x00000000,
473 	(0x6e00 << 16) | (0xc910 >> 2),
474 	0x00000000,
475 	(0x7e00 << 16) | (0xc910 >> 2),
476 	0x00000000,
477 	(0x8e00 << 16) | (0xc910 >> 2),
478 	0x00000000,
479 	(0x9e00 << 16) | (0xc910 >> 2),
480 	0x00000000,
481 	(0xae00 << 16) | (0xc910 >> 2),
482 	0x00000000,
483 	(0xbe00 << 16) | (0xc910 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xc99c >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x9834 >> 2),
488 	0x00000000,
489 	(0x0000 << 16) | (0x30f00 >> 2),
490 	0x00000000,
491 	(0x0001 << 16) | (0x30f00 >> 2),
492 	0x00000000,
493 	(0x0000 << 16) | (0x30f04 >> 2),
494 	0x00000000,
495 	(0x0001 << 16) | (0x30f04 >> 2),
496 	0x00000000,
497 	(0x0000 << 16) | (0x30f08 >> 2),
498 	0x00000000,
499 	(0x0001 << 16) | (0x30f08 >> 2),
500 	0x00000000,
501 	(0x0000 << 16) | (0x30f0c >> 2),
502 	0x00000000,
503 	(0x0001 << 16) | (0x30f0c >> 2),
504 	0x00000000,
505 	(0x0600 << 16) | (0x9b7c >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0x8a14 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x8a18 >> 2),
510 	0x00000000,
511 	(0x0600 << 16) | (0x30a00 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x8bf0 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x8bcc >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x8b24 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x30a04 >> 2),
520 	0x00000000,
521 	(0x0600 << 16) | (0x30a10 >> 2),
522 	0x00000000,
523 	(0x0600 << 16) | (0x30a14 >> 2),
524 	0x00000000,
525 	(0x0600 << 16) | (0x30a18 >> 2),
526 	0x00000000,
527 	(0x0600 << 16) | (0x30a2c >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0xc700 >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0xc704 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0xc708 >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0xc768 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc770 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc774 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc778 >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc77c >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc780 >> 2),
546 	0x00000000,
547 	(0x0400 << 16) | (0xc784 >> 2),
548 	0x00000000,
549 	(0x0400 << 16) | (0xc788 >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0xc78c >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0xc798 >> 2),
554 	0x00000000,
555 	(0x0400 << 16) | (0xc79c >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0xc7a0 >> 2),
558 	0x00000000,
559 	(0x0400 << 16) | (0xc7a4 >> 2),
560 	0x00000000,
561 	(0x0400 << 16) | (0xc7a8 >> 2),
562 	0x00000000,
563 	(0x0400 << 16) | (0xc7ac >> 2),
564 	0x00000000,
565 	(0x0400 << 16) | (0xc7b0 >> 2),
566 	0x00000000,
567 	(0x0400 << 16) | (0xc7b4 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x9100 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x3c010 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x92a8 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x92ac >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x92b4 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x92b8 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x92bc >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x92c0 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x92c4 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x92c8 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x92cc >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x92d0 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x8c00 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x8c04 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x8c20 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x8c38 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0x8c3c >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xae00 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0x9604 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac08 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac0c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac10 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac14 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xac58 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xac68 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xac6c >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xac70 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xac74 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xac78 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0xac7c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0xac80 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xac84 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xac88 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xac8c >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x970c >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x9714 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x9718 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x971c >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x31068 >> 2),
646 	0x00000000,
647 	(0x4e00 << 16) | (0x31068 >> 2),
648 	0x00000000,
649 	(0x5e00 << 16) | (0x31068 >> 2),
650 	0x00000000,
651 	(0x6e00 << 16) | (0x31068 >> 2),
652 	0x00000000,
653 	(0x7e00 << 16) | (0x31068 >> 2),
654 	0x00000000,
655 	(0x8e00 << 16) | (0x31068 >> 2),
656 	0x00000000,
657 	(0x9e00 << 16) | (0x31068 >> 2),
658 	0x00000000,
659 	(0xae00 << 16) | (0x31068 >> 2),
660 	0x00000000,
661 	(0xbe00 << 16) | (0x31068 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xcd10 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xcd14 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x88b0 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x88b4 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x88b8 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x88bc >> 2),
674 	0x00000000,
675 	(0x0400 << 16) | (0x89c0 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x88c4 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x88c8 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x88d0 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x88d4 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x88d8 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x8980 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x30938 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x3093c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x30940 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x89a0 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x30900 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30904 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x89b4 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x3c210 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x3c214 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3c218 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x8904 >> 2),
710 	0x00000000,
711 	0x5,
712 	(0x0e00 << 16) | (0x8c28 >> 2),
713 	(0x0e00 << 16) | (0x8c2c >> 2),
714 	(0x0e00 << 16) | (0x8c30 >> 2),
715 	(0x0e00 << 16) | (0x8c34 >> 2),
716 	(0x0e00 << 16) | (0x9600 >> 2),
717 };
718 
719 static const u32 kalindi_rlc_save_restore_register_list[] =
720 {
721 	(0x0e00 << 16) | (0xc12c >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc140 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc150 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc15c >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc168 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc170 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc204 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc2b4 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc2b8 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc2bc >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc2c0 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x8228 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0x829c >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x869c >> 2),
748 	0x00000000,
749 	(0x0600 << 16) | (0x98f4 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x98f8 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x9900 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0xc260 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x90e8 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x3c000 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3c00c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8c1c >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x9700 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0xcd20 >> 2),
768 	0x00000000,
769 	(0x4e00 << 16) | (0xcd20 >> 2),
770 	0x00000000,
771 	(0x5e00 << 16) | (0xcd20 >> 2),
772 	0x00000000,
773 	(0x6e00 << 16) | (0xcd20 >> 2),
774 	0x00000000,
775 	(0x7e00 << 16) | (0xcd20 >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0x89bc >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x8900 >> 2),
780 	0x00000000,
781 	0x3,
782 	(0x0e00 << 16) | (0xc130 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc134 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc1fc >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc208 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc264 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc268 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc26c >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc270 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc274 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc28c >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0xc290 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc294 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0xc298 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0xc2a0 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xc2a4 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xc2a8 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xc2ac >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0x301d0 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x30238 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x30250 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0x30254 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0x30258 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x3025c >> 2),
827 	0x00000000,
828 	(0x4e00 << 16) | (0xc900 >> 2),
829 	0x00000000,
830 	(0x5e00 << 16) | (0xc900 >> 2),
831 	0x00000000,
832 	(0x6e00 << 16) | (0xc900 >> 2),
833 	0x00000000,
834 	(0x7e00 << 16) | (0xc900 >> 2),
835 	0x00000000,
836 	(0x4e00 << 16) | (0xc904 >> 2),
837 	0x00000000,
838 	(0x5e00 << 16) | (0xc904 >> 2),
839 	0x00000000,
840 	(0x6e00 << 16) | (0xc904 >> 2),
841 	0x00000000,
842 	(0x7e00 << 16) | (0xc904 >> 2),
843 	0x00000000,
844 	(0x4e00 << 16) | (0xc908 >> 2),
845 	0x00000000,
846 	(0x5e00 << 16) | (0xc908 >> 2),
847 	0x00000000,
848 	(0x6e00 << 16) | (0xc908 >> 2),
849 	0x00000000,
850 	(0x7e00 << 16) | (0xc908 >> 2),
851 	0x00000000,
852 	(0x4e00 << 16) | (0xc90c >> 2),
853 	0x00000000,
854 	(0x5e00 << 16) | (0xc90c >> 2),
855 	0x00000000,
856 	(0x6e00 << 16) | (0xc90c >> 2),
857 	0x00000000,
858 	(0x7e00 << 16) | (0xc90c >> 2),
859 	0x00000000,
860 	(0x4e00 << 16) | (0xc910 >> 2),
861 	0x00000000,
862 	(0x5e00 << 16) | (0xc910 >> 2),
863 	0x00000000,
864 	(0x6e00 << 16) | (0xc910 >> 2),
865 	0x00000000,
866 	(0x7e00 << 16) | (0xc910 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0xc99c >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x9834 >> 2),
871 	0x00000000,
872 	(0x0000 << 16) | (0x30f00 >> 2),
873 	0x00000000,
874 	(0x0000 << 16) | (0x30f04 >> 2),
875 	0x00000000,
876 	(0x0000 << 16) | (0x30f08 >> 2),
877 	0x00000000,
878 	(0x0000 << 16) | (0x30f0c >> 2),
879 	0x00000000,
880 	(0x0600 << 16) | (0x9b7c >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x8a14 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x8a18 >> 2),
885 	0x00000000,
886 	(0x0600 << 16) | (0x30a00 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x8bf0 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x8bcc >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x8b24 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30a04 >> 2),
895 	0x00000000,
896 	(0x0600 << 16) | (0x30a10 >> 2),
897 	0x00000000,
898 	(0x0600 << 16) | (0x30a14 >> 2),
899 	0x00000000,
900 	(0x0600 << 16) | (0x30a18 >> 2),
901 	0x00000000,
902 	(0x0600 << 16) | (0x30a2c >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0xc700 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0xc704 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xc708 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xc768 >> 2),
911 	0x00000000,
912 	(0x0400 << 16) | (0xc770 >> 2),
913 	0x00000000,
914 	(0x0400 << 16) | (0xc774 >> 2),
915 	0x00000000,
916 	(0x0400 << 16) | (0xc798 >> 2),
917 	0x00000000,
918 	(0x0400 << 16) | (0xc79c >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0x9100 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x3c010 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0x8c00 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x8c04 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x8c20 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x8c38 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x8c3c >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xae00 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0x9604 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac08 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac0c >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac10 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac14 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0xac58 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0xac68 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0xac6c >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0xac70 >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0xac74 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xac78 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xac7c >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0xac80 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xac84 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xac88 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0xac8c >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x970c >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x9714 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x9718 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x971c >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x31068 >> 2),
977 	0x00000000,
978 	(0x4e00 << 16) | (0x31068 >> 2),
979 	0x00000000,
980 	(0x5e00 << 16) | (0x31068 >> 2),
981 	0x00000000,
982 	(0x6e00 << 16) | (0x31068 >> 2),
983 	0x00000000,
984 	(0x7e00 << 16) | (0x31068 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0xcd10 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0xcd14 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x88b0 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x88b4 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x88b8 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x88bc >> 2),
997 	0x00000000,
998 	(0x0400 << 16) | (0x89c0 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x88c4 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x88c8 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x88d0 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x88d4 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x88d8 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x8980 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x30938 >> 2),
1013 	0x00000000,
1014 	(0x0e00 << 16) | (0x3093c >> 2),
1015 	0x00000000,
1016 	(0x0e00 << 16) | (0x30940 >> 2),
1017 	0x00000000,
1018 	(0x0e00 << 16) | (0x89a0 >> 2),
1019 	0x00000000,
1020 	(0x0e00 << 16) | (0x30900 >> 2),
1021 	0x00000000,
1022 	(0x0e00 << 16) | (0x30904 >> 2),
1023 	0x00000000,
1024 	(0x0e00 << 16) | (0x89b4 >> 2),
1025 	0x00000000,
1026 	(0x0e00 << 16) | (0x3e1fc >> 2),
1027 	0x00000000,
1028 	(0x0e00 << 16) | (0x3c210 >> 2),
1029 	0x00000000,
1030 	(0x0e00 << 16) | (0x3c214 >> 2),
1031 	0x00000000,
1032 	(0x0e00 << 16) | (0x3c218 >> 2),
1033 	0x00000000,
1034 	(0x0e00 << 16) | (0x8904 >> 2),
1035 	0x00000000,
1036 	0x5,
1037 	(0x0e00 << 16) | (0x8c28 >> 2),
1038 	(0x0e00 << 16) | (0x8c2c >> 2),
1039 	(0x0e00 << 16) | (0x8c30 >> 2),
1040 	(0x0e00 << 16) | (0x8c34 >> 2),
1041 	(0x0e00 << 16) | (0x9600 >> 2),
1042 };
1043 
1044 static const u32 bonaire_golden_spm_registers[] =
1045 {
1046 	0x30800, 0xe0ffffff, 0xe0000000
1047 };
1048 
1049 static const u32 bonaire_golden_common_registers[] =
1050 {
1051 	0xc770, 0xffffffff, 0x00000800,
1052 	0xc774, 0xffffffff, 0x00000800,
1053 	0xc798, 0xffffffff, 0x00007fbf,
1054 	0xc79c, 0xffffffff, 0x00007faf
1055 };
1056 
1057 static const u32 bonaire_golden_registers[] =
1058 {
1059 	0x3354, 0x00000333, 0x00000333,
1060 	0x3350, 0x000c0fc0, 0x00040200,
1061 	0x9a10, 0x00010000, 0x00058208,
1062 	0x3c000, 0xffff1fff, 0x00140000,
1063 	0x3c200, 0xfdfc0fff, 0x00000100,
1064 	0x3c234, 0x40000000, 0x40000200,
1065 	0x9830, 0xffffffff, 0x00000000,
1066 	0x9834, 0xf00fffff, 0x00000400,
1067 	0x9838, 0x0002021c, 0x00020200,
1068 	0xc78, 0x00000080, 0x00000000,
1069 	0x5bb0, 0x000000f0, 0x00000070,
1070 	0x5bc0, 0xf0311fff, 0x80300000,
1071 	0x98f8, 0x73773777, 0x12010001,
1072 	0x350c, 0x00810000, 0x408af000,
1073 	0x7030, 0x31000111, 0x00000011,
1074 	0x2f48, 0x73773777, 0x12010001,
1075 	0x220c, 0x00007fb6, 0x0021a1b1,
1076 	0x2210, 0x00007fb6, 0x002021b1,
1077 	0x2180, 0x00007fb6, 0x00002191,
1078 	0x2218, 0x00007fb6, 0x002121b1,
1079 	0x221c, 0x00007fb6, 0x002021b1,
1080 	0x21dc, 0x00007fb6, 0x00002191,
1081 	0x21e0, 0x00007fb6, 0x00002191,
1082 	0x3628, 0x0000003f, 0x0000000a,
1083 	0x362c, 0x0000003f, 0x0000000a,
1084 	0x2ae4, 0x00073ffe, 0x000022a2,
1085 	0x240c, 0x000007ff, 0x00000000,
1086 	0x8a14, 0xf000003f, 0x00000007,
1087 	0x8bf0, 0x00002001, 0x00000001,
1088 	0x8b24, 0xffffffff, 0x00ffffff,
1089 	0x30a04, 0x0000ff0f, 0x00000000,
1090 	0x28a4c, 0x07ffffff, 0x06000000,
1091 	0x4d8, 0x00000fff, 0x00000100,
1092 	0x3e78, 0x00000001, 0x00000002,
1093 	0x9100, 0x03000000, 0x0362c688,
1094 	0x8c00, 0x000000ff, 0x00000001,
1095 	0xe40, 0x00001fff, 0x00001fff,
1096 	0x9060, 0x0000007f, 0x00000020,
1097 	0x9508, 0x00010000, 0x00010000,
1098 	0xac14, 0x000003ff, 0x000000f3,
1099 	0xac0c, 0xffffffff, 0x00001032
1100 };
1101 
1102 static const u32 bonaire_mgcg_cgcg_init[] =
1103 {
1104 	0xc420, 0xffffffff, 0xfffffffc,
1105 	0x30800, 0xffffffff, 0xe0000000,
1106 	0x3c2a0, 0xffffffff, 0x00000100,
1107 	0x3c208, 0xffffffff, 0x00000100,
1108 	0x3c2c0, 0xffffffff, 0xc0000100,
1109 	0x3c2c8, 0xffffffff, 0xc0000100,
1110 	0x3c2c4, 0xffffffff, 0xc0000100,
1111 	0x55e4, 0xffffffff, 0x00600100,
1112 	0x3c280, 0xffffffff, 0x00000100,
1113 	0x3c214, 0xffffffff, 0x06000100,
1114 	0x3c220, 0xffffffff, 0x00000100,
1115 	0x3c218, 0xffffffff, 0x06000100,
1116 	0x3c204, 0xffffffff, 0x00000100,
1117 	0x3c2e0, 0xffffffff, 0x00000100,
1118 	0x3c224, 0xffffffff, 0x00000100,
1119 	0x3c200, 0xffffffff, 0x00000100,
1120 	0x3c230, 0xffffffff, 0x00000100,
1121 	0x3c234, 0xffffffff, 0x00000100,
1122 	0x3c250, 0xffffffff, 0x00000100,
1123 	0x3c254, 0xffffffff, 0x00000100,
1124 	0x3c258, 0xffffffff, 0x00000100,
1125 	0x3c25c, 0xffffffff, 0x00000100,
1126 	0x3c260, 0xffffffff, 0x00000100,
1127 	0x3c27c, 0xffffffff, 0x00000100,
1128 	0x3c278, 0xffffffff, 0x00000100,
1129 	0x3c210, 0xffffffff, 0x06000100,
1130 	0x3c290, 0xffffffff, 0x00000100,
1131 	0x3c274, 0xffffffff, 0x00000100,
1132 	0x3c2b4, 0xffffffff, 0x00000100,
1133 	0x3c2b0, 0xffffffff, 0x00000100,
1134 	0x3c270, 0xffffffff, 0x00000100,
1135 	0x30800, 0xffffffff, 0xe0000000,
1136 	0x3c020, 0xffffffff, 0x00010000,
1137 	0x3c024, 0xffffffff, 0x00030002,
1138 	0x3c028, 0xffffffff, 0x00040007,
1139 	0x3c02c, 0xffffffff, 0x00060005,
1140 	0x3c030, 0xffffffff, 0x00090008,
1141 	0x3c034, 0xffffffff, 0x00010000,
1142 	0x3c038, 0xffffffff, 0x00030002,
1143 	0x3c03c, 0xffffffff, 0x00040007,
1144 	0x3c040, 0xffffffff, 0x00060005,
1145 	0x3c044, 0xffffffff, 0x00090008,
1146 	0x3c048, 0xffffffff, 0x00010000,
1147 	0x3c04c, 0xffffffff, 0x00030002,
1148 	0x3c050, 0xffffffff, 0x00040007,
1149 	0x3c054, 0xffffffff, 0x00060005,
1150 	0x3c058, 0xffffffff, 0x00090008,
1151 	0x3c05c, 0xffffffff, 0x00010000,
1152 	0x3c060, 0xffffffff, 0x00030002,
1153 	0x3c064, 0xffffffff, 0x00040007,
1154 	0x3c068, 0xffffffff, 0x00060005,
1155 	0x3c06c, 0xffffffff, 0x00090008,
1156 	0x3c070, 0xffffffff, 0x00010000,
1157 	0x3c074, 0xffffffff, 0x00030002,
1158 	0x3c078, 0xffffffff, 0x00040007,
1159 	0x3c07c, 0xffffffff, 0x00060005,
1160 	0x3c080, 0xffffffff, 0x00090008,
1161 	0x3c084, 0xffffffff, 0x00010000,
1162 	0x3c088, 0xffffffff, 0x00030002,
1163 	0x3c08c, 0xffffffff, 0x00040007,
1164 	0x3c090, 0xffffffff, 0x00060005,
1165 	0x3c094, 0xffffffff, 0x00090008,
1166 	0x3c098, 0xffffffff, 0x00010000,
1167 	0x3c09c, 0xffffffff, 0x00030002,
1168 	0x3c0a0, 0xffffffff, 0x00040007,
1169 	0x3c0a4, 0xffffffff, 0x00060005,
1170 	0x3c0a8, 0xffffffff, 0x00090008,
1171 	0x3c000, 0xffffffff, 0x96e00200,
1172 	0x8708, 0xffffffff, 0x00900100,
1173 	0xc424, 0xffffffff, 0x0020003f,
1174 	0x38, 0xffffffff, 0x0140001c,
1175 	0x3c, 0x000f0000, 0x000f0000,
1176 	0x220, 0xffffffff, 0xC060000C,
1177 	0x224, 0xc0000fff, 0x00000100,
1178 	0xf90, 0xffffffff, 0x00000100,
1179 	0xf98, 0x00000101, 0x00000000,
1180 	0x20a8, 0xffffffff, 0x00000104,
1181 	0x55e4, 0xff000fff, 0x00000100,
1182 	0x30cc, 0xc0000fff, 0x00000104,
1183 	0xc1e4, 0x00000001, 0x00000001,
1184 	0xd00c, 0xff000ff0, 0x00000100,
1185 	0xd80c, 0xff000ff0, 0x00000100
1186 };
1187 
1188 static const u32 spectre_golden_spm_registers[] =
1189 {
1190 	0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192 
1193 static const u32 spectre_golden_common_registers[] =
1194 {
1195 	0xc770, 0xffffffff, 0x00000800,
1196 	0xc774, 0xffffffff, 0x00000800,
1197 	0xc798, 0xffffffff, 0x00007fbf,
1198 	0xc79c, 0xffffffff, 0x00007faf
1199 };
1200 
1201 static const u32 spectre_golden_registers[] =
1202 {
1203 	0x3c000, 0xffff1fff, 0x96940200,
1204 	0x3c00c, 0xffff0001, 0xff000000,
1205 	0x3c200, 0xfffc0fff, 0x00000100,
1206 	0x6ed8, 0x00010101, 0x00010000,
1207 	0x9834, 0xf00fffff, 0x00000400,
1208 	0x9838, 0xfffffffc, 0x00020200,
1209 	0x5bb0, 0x000000f0, 0x00000070,
1210 	0x5bc0, 0xf0311fff, 0x80300000,
1211 	0x98f8, 0x73773777, 0x12010001,
1212 	0x9b7c, 0x00ff0000, 0x00fc0000,
1213 	0x2f48, 0x73773777, 0x12010001,
1214 	0x8a14, 0xf000003f, 0x00000007,
1215 	0x8b24, 0xffffffff, 0x00ffffff,
1216 	0x28350, 0x3f3f3fff, 0x00000082,
1217 	0x28354, 0x0000003f, 0x00000000,
1218 	0x3e78, 0x00000001, 0x00000002,
1219 	0x913c, 0xffff03df, 0x00000004,
1220 	0xc768, 0x00000008, 0x00000008,
1221 	0x8c00, 0x000008ff, 0x00000800,
1222 	0x9508, 0x00010000, 0x00010000,
1223 	0xac0c, 0xffffffff, 0x54763210,
1224 	0x214f8, 0x01ff01ff, 0x00000002,
1225 	0x21498, 0x007ff800, 0x00200000,
1226 	0x2015c, 0xffffffff, 0x00000f40,
1227 	0x30934, 0xffffffff, 0x00000001
1228 };
1229 
1230 static const u32 spectre_mgcg_cgcg_init[] =
1231 {
1232 	0xc420, 0xffffffff, 0xfffffffc,
1233 	0x30800, 0xffffffff, 0xe0000000,
1234 	0x3c2a0, 0xffffffff, 0x00000100,
1235 	0x3c208, 0xffffffff, 0x00000100,
1236 	0x3c2c0, 0xffffffff, 0x00000100,
1237 	0x3c2c8, 0xffffffff, 0x00000100,
1238 	0x3c2c4, 0xffffffff, 0x00000100,
1239 	0x55e4, 0xffffffff, 0x00600100,
1240 	0x3c280, 0xffffffff, 0x00000100,
1241 	0x3c214, 0xffffffff, 0x06000100,
1242 	0x3c220, 0xffffffff, 0x00000100,
1243 	0x3c218, 0xffffffff, 0x06000100,
1244 	0x3c204, 0xffffffff, 0x00000100,
1245 	0x3c2e0, 0xffffffff, 0x00000100,
1246 	0x3c224, 0xffffffff, 0x00000100,
1247 	0x3c200, 0xffffffff, 0x00000100,
1248 	0x3c230, 0xffffffff, 0x00000100,
1249 	0x3c234, 0xffffffff, 0x00000100,
1250 	0x3c250, 0xffffffff, 0x00000100,
1251 	0x3c254, 0xffffffff, 0x00000100,
1252 	0x3c258, 0xffffffff, 0x00000100,
1253 	0x3c25c, 0xffffffff, 0x00000100,
1254 	0x3c260, 0xffffffff, 0x00000100,
1255 	0x3c27c, 0xffffffff, 0x00000100,
1256 	0x3c278, 0xffffffff, 0x00000100,
1257 	0x3c210, 0xffffffff, 0x06000100,
1258 	0x3c290, 0xffffffff, 0x00000100,
1259 	0x3c274, 0xffffffff, 0x00000100,
1260 	0x3c2b4, 0xffffffff, 0x00000100,
1261 	0x3c2b0, 0xffffffff, 0x00000100,
1262 	0x3c270, 0xffffffff, 0x00000100,
1263 	0x30800, 0xffffffff, 0xe0000000,
1264 	0x3c020, 0xffffffff, 0x00010000,
1265 	0x3c024, 0xffffffff, 0x00030002,
1266 	0x3c028, 0xffffffff, 0x00040007,
1267 	0x3c02c, 0xffffffff, 0x00060005,
1268 	0x3c030, 0xffffffff, 0x00090008,
1269 	0x3c034, 0xffffffff, 0x00010000,
1270 	0x3c038, 0xffffffff, 0x00030002,
1271 	0x3c03c, 0xffffffff, 0x00040007,
1272 	0x3c040, 0xffffffff, 0x00060005,
1273 	0x3c044, 0xffffffff, 0x00090008,
1274 	0x3c048, 0xffffffff, 0x00010000,
1275 	0x3c04c, 0xffffffff, 0x00030002,
1276 	0x3c050, 0xffffffff, 0x00040007,
1277 	0x3c054, 0xffffffff, 0x00060005,
1278 	0x3c058, 0xffffffff, 0x00090008,
1279 	0x3c05c, 0xffffffff, 0x00010000,
1280 	0x3c060, 0xffffffff, 0x00030002,
1281 	0x3c064, 0xffffffff, 0x00040007,
1282 	0x3c068, 0xffffffff, 0x00060005,
1283 	0x3c06c, 0xffffffff, 0x00090008,
1284 	0x3c070, 0xffffffff, 0x00010000,
1285 	0x3c074, 0xffffffff, 0x00030002,
1286 	0x3c078, 0xffffffff, 0x00040007,
1287 	0x3c07c, 0xffffffff, 0x00060005,
1288 	0x3c080, 0xffffffff, 0x00090008,
1289 	0x3c084, 0xffffffff, 0x00010000,
1290 	0x3c088, 0xffffffff, 0x00030002,
1291 	0x3c08c, 0xffffffff, 0x00040007,
1292 	0x3c090, 0xffffffff, 0x00060005,
1293 	0x3c094, 0xffffffff, 0x00090008,
1294 	0x3c098, 0xffffffff, 0x00010000,
1295 	0x3c09c, 0xffffffff, 0x00030002,
1296 	0x3c0a0, 0xffffffff, 0x00040007,
1297 	0x3c0a4, 0xffffffff, 0x00060005,
1298 	0x3c0a8, 0xffffffff, 0x00090008,
1299 	0x3c0ac, 0xffffffff, 0x00010000,
1300 	0x3c0b0, 0xffffffff, 0x00030002,
1301 	0x3c0b4, 0xffffffff, 0x00040007,
1302 	0x3c0b8, 0xffffffff, 0x00060005,
1303 	0x3c0bc, 0xffffffff, 0x00090008,
1304 	0x3c000, 0xffffffff, 0x96e00200,
1305 	0x8708, 0xffffffff, 0x00900100,
1306 	0xc424, 0xffffffff, 0x0020003f,
1307 	0x38, 0xffffffff, 0x0140001c,
1308 	0x3c, 0x000f0000, 0x000f0000,
1309 	0x220, 0xffffffff, 0xC060000C,
1310 	0x224, 0xc0000fff, 0x00000100,
1311 	0xf90, 0xffffffff, 0x00000100,
1312 	0xf98, 0x00000101, 0x00000000,
1313 	0x20a8, 0xffffffff, 0x00000104,
1314 	0x55e4, 0xff000fff, 0x00000100,
1315 	0x30cc, 0xc0000fff, 0x00000104,
1316 	0xc1e4, 0x00000001, 0x00000001,
1317 	0xd00c, 0xff000ff0, 0x00000100,
1318 	0xd80c, 0xff000ff0, 0x00000100
1319 };
1320 
1321 static const u32 kalindi_golden_spm_registers[] =
1322 {
1323 	0x30800, 0xe0ffffff, 0xe0000000
1324 };
1325 
1326 static const u32 kalindi_golden_common_registers[] =
1327 {
1328 	0xc770, 0xffffffff, 0x00000800,
1329 	0xc774, 0xffffffff, 0x00000800,
1330 	0xc798, 0xffffffff, 0x00007fbf,
1331 	0xc79c, 0xffffffff, 0x00007faf
1332 };
1333 
1334 static const u32 kalindi_golden_registers[] =
1335 {
1336 	0x3c000, 0xffffdfff, 0x6e944040,
1337 	0x55e4, 0xff607fff, 0xfc000100,
1338 	0x3c220, 0xff000fff, 0x00000100,
1339 	0x3c224, 0xff000fff, 0x00000100,
1340 	0x3c200, 0xfffc0fff, 0x00000100,
1341 	0x6ed8, 0x00010101, 0x00010000,
1342 	0x9830, 0xffffffff, 0x00000000,
1343 	0x9834, 0xf00fffff, 0x00000400,
1344 	0x5bb0, 0x000000f0, 0x00000070,
1345 	0x5bc0, 0xf0311fff, 0x80300000,
1346 	0x98f8, 0x73773777, 0x12010001,
1347 	0x98fc, 0xffffffff, 0x00000010,
1348 	0x9b7c, 0x00ff0000, 0x00fc0000,
1349 	0x8030, 0x00001f0f, 0x0000100a,
1350 	0x2f48, 0x73773777, 0x12010001,
1351 	0x2408, 0x000fffff, 0x000c007f,
1352 	0x8a14, 0xf000003f, 0x00000007,
1353 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1354 	0x30a04, 0x0000ff0f, 0x00000000,
1355 	0x28a4c, 0x07ffffff, 0x06000000,
1356 	0x4d8, 0x00000fff, 0x00000100,
1357 	0x3e78, 0x00000001, 0x00000002,
1358 	0xc768, 0x00000008, 0x00000008,
1359 	0x8c00, 0x000000ff, 0x00000003,
1360 	0x214f8, 0x01ff01ff, 0x00000002,
1361 	0x21498, 0x007ff800, 0x00200000,
1362 	0x2015c, 0xffffffff, 0x00000f40,
1363 	0x88c4, 0x001f3ae3, 0x00000082,
1364 	0x88d4, 0x0000001f, 0x00000010,
1365 	0x30934, 0xffffffff, 0x00000000
1366 };
1367 
1368 static const u32 kalindi_mgcg_cgcg_init[] =
1369 {
1370 	0xc420, 0xffffffff, 0xfffffffc,
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x3c2a0, 0xffffffff, 0x00000100,
1373 	0x3c208, 0xffffffff, 0x00000100,
1374 	0x3c2c0, 0xffffffff, 0x00000100,
1375 	0x3c2c8, 0xffffffff, 0x00000100,
1376 	0x3c2c4, 0xffffffff, 0x00000100,
1377 	0x55e4, 0xffffffff, 0x00600100,
1378 	0x3c280, 0xffffffff, 0x00000100,
1379 	0x3c214, 0xffffffff, 0x06000100,
1380 	0x3c220, 0xffffffff, 0x00000100,
1381 	0x3c218, 0xffffffff, 0x06000100,
1382 	0x3c204, 0xffffffff, 0x00000100,
1383 	0x3c2e0, 0xffffffff, 0x00000100,
1384 	0x3c224, 0xffffffff, 0x00000100,
1385 	0x3c200, 0xffffffff, 0x00000100,
1386 	0x3c230, 0xffffffff, 0x00000100,
1387 	0x3c234, 0xffffffff, 0x00000100,
1388 	0x3c250, 0xffffffff, 0x00000100,
1389 	0x3c254, 0xffffffff, 0x00000100,
1390 	0x3c258, 0xffffffff, 0x00000100,
1391 	0x3c25c, 0xffffffff, 0x00000100,
1392 	0x3c260, 0xffffffff, 0x00000100,
1393 	0x3c27c, 0xffffffff, 0x00000100,
1394 	0x3c278, 0xffffffff, 0x00000100,
1395 	0x3c210, 0xffffffff, 0x06000100,
1396 	0x3c290, 0xffffffff, 0x00000100,
1397 	0x3c274, 0xffffffff, 0x00000100,
1398 	0x3c2b4, 0xffffffff, 0x00000100,
1399 	0x3c2b0, 0xffffffff, 0x00000100,
1400 	0x3c270, 0xffffffff, 0x00000100,
1401 	0x30800, 0xffffffff, 0xe0000000,
1402 	0x3c020, 0xffffffff, 0x00010000,
1403 	0x3c024, 0xffffffff, 0x00030002,
1404 	0x3c028, 0xffffffff, 0x00040007,
1405 	0x3c02c, 0xffffffff, 0x00060005,
1406 	0x3c030, 0xffffffff, 0x00090008,
1407 	0x3c034, 0xffffffff, 0x00010000,
1408 	0x3c038, 0xffffffff, 0x00030002,
1409 	0x3c03c, 0xffffffff, 0x00040007,
1410 	0x3c040, 0xffffffff, 0x00060005,
1411 	0x3c044, 0xffffffff, 0x00090008,
1412 	0x3c000, 0xffffffff, 0x96e00200,
1413 	0x8708, 0xffffffff, 0x00900100,
1414 	0xc424, 0xffffffff, 0x0020003f,
1415 	0x38, 0xffffffff, 0x0140001c,
1416 	0x3c, 0x000f0000, 0x000f0000,
1417 	0x220, 0xffffffff, 0xC060000C,
1418 	0x224, 0xc0000fff, 0x00000100,
1419 	0x20a8, 0xffffffff, 0x00000104,
1420 	0x55e4, 0xff000fff, 0x00000100,
1421 	0x30cc, 0xc0000fff, 0x00000104,
1422 	0xc1e4, 0x00000001, 0x00000001,
1423 	0xd00c, 0xff000ff0, 0x00000100,
1424 	0xd80c, 0xff000ff0, 0x00000100
1425 };
1426 
1427 static const u32 hawaii_golden_spm_registers[] =
1428 {
1429 	0x30800, 0xe0ffffff, 0xe0000000
1430 };
1431 
1432 static const u32 hawaii_golden_common_registers[] =
1433 {
1434 	0x30800, 0xffffffff, 0xe0000000,
1435 	0x28350, 0xffffffff, 0x3a00161a,
1436 	0x28354, 0xffffffff, 0x0000002e,
1437 	0x9a10, 0xffffffff, 0x00018208,
1438 	0x98f8, 0xffffffff, 0x12011003
1439 };
1440 
1441 static const u32 hawaii_golden_registers[] =
1442 {
1443 	0x3354, 0x00000333, 0x00000333,
1444 	0x9a10, 0x00010000, 0x00058208,
1445 	0x9830, 0xffffffff, 0x00000000,
1446 	0x9834, 0xf00fffff, 0x00000400,
1447 	0x9838, 0x0002021c, 0x00020200,
1448 	0xc78, 0x00000080, 0x00000000,
1449 	0x5bb0, 0x000000f0, 0x00000070,
1450 	0x5bc0, 0xf0311fff, 0x80300000,
1451 	0x350c, 0x00810000, 0x408af000,
1452 	0x7030, 0x31000111, 0x00000011,
1453 	0x2f48, 0x73773777, 0x12010001,
1454 	0x2120, 0x0000007f, 0x0000001b,
1455 	0x21dc, 0x00007fb6, 0x00002191,
1456 	0x3628, 0x0000003f, 0x0000000a,
1457 	0x362c, 0x0000003f, 0x0000000a,
1458 	0x2ae4, 0x00073ffe, 0x000022a2,
1459 	0x240c, 0x000007ff, 0x00000000,
1460 	0x8bf0, 0x00002001, 0x00000001,
1461 	0x8b24, 0xffffffff, 0x00ffffff,
1462 	0x30a04, 0x0000ff0f, 0x00000000,
1463 	0x28a4c, 0x07ffffff, 0x06000000,
1464 	0x3e78, 0x00000001, 0x00000002,
1465 	0xc768, 0x00000008, 0x00000008,
1466 	0xc770, 0x00000f00, 0x00000800,
1467 	0xc774, 0x00000f00, 0x00000800,
1468 	0xc798, 0x00ffffff, 0x00ff7fbf,
1469 	0xc79c, 0x00ffffff, 0x00ff7faf,
1470 	0x8c00, 0x000000ff, 0x00000800,
1471 	0xe40, 0x00001fff, 0x00001fff,
1472 	0x9060, 0x0000007f, 0x00000020,
1473 	0x9508, 0x00010000, 0x00010000,
1474 	0xae00, 0x00100000, 0x000ff07c,
1475 	0xac14, 0x000003ff, 0x0000000f,
1476 	0xac10, 0xffffffff, 0x7564fdec,
1477 	0xac0c, 0xffffffff, 0x3120b9a8,
1478 	0xac08, 0x20000000, 0x0f9c0000
1479 };
1480 
1481 static const u32 hawaii_mgcg_cgcg_init[] =
1482 {
1483 	0xc420, 0xffffffff, 0xfffffffd,
1484 	0x30800, 0xffffffff, 0xe0000000,
1485 	0x3c2a0, 0xffffffff, 0x00000100,
1486 	0x3c208, 0xffffffff, 0x00000100,
1487 	0x3c2c0, 0xffffffff, 0x00000100,
1488 	0x3c2c8, 0xffffffff, 0x00000100,
1489 	0x3c2c4, 0xffffffff, 0x00000100,
1490 	0x55e4, 0xffffffff, 0x00200100,
1491 	0x3c280, 0xffffffff, 0x00000100,
1492 	0x3c214, 0xffffffff, 0x06000100,
1493 	0x3c220, 0xffffffff, 0x00000100,
1494 	0x3c218, 0xffffffff, 0x06000100,
1495 	0x3c204, 0xffffffff, 0x00000100,
1496 	0x3c2e0, 0xffffffff, 0x00000100,
1497 	0x3c224, 0xffffffff, 0x00000100,
1498 	0x3c200, 0xffffffff, 0x00000100,
1499 	0x3c230, 0xffffffff, 0x00000100,
1500 	0x3c234, 0xffffffff, 0x00000100,
1501 	0x3c250, 0xffffffff, 0x00000100,
1502 	0x3c254, 0xffffffff, 0x00000100,
1503 	0x3c258, 0xffffffff, 0x00000100,
1504 	0x3c25c, 0xffffffff, 0x00000100,
1505 	0x3c260, 0xffffffff, 0x00000100,
1506 	0x3c27c, 0xffffffff, 0x00000100,
1507 	0x3c278, 0xffffffff, 0x00000100,
1508 	0x3c210, 0xffffffff, 0x06000100,
1509 	0x3c290, 0xffffffff, 0x00000100,
1510 	0x3c274, 0xffffffff, 0x00000100,
1511 	0x3c2b4, 0xffffffff, 0x00000100,
1512 	0x3c2b0, 0xffffffff, 0x00000100,
1513 	0x3c270, 0xffffffff, 0x00000100,
1514 	0x30800, 0xffffffff, 0xe0000000,
1515 	0x3c020, 0xffffffff, 0x00010000,
1516 	0x3c024, 0xffffffff, 0x00030002,
1517 	0x3c028, 0xffffffff, 0x00040007,
1518 	0x3c02c, 0xffffffff, 0x00060005,
1519 	0x3c030, 0xffffffff, 0x00090008,
1520 	0x3c034, 0xffffffff, 0x00010000,
1521 	0x3c038, 0xffffffff, 0x00030002,
1522 	0x3c03c, 0xffffffff, 0x00040007,
1523 	0x3c040, 0xffffffff, 0x00060005,
1524 	0x3c044, 0xffffffff, 0x00090008,
1525 	0x3c048, 0xffffffff, 0x00010000,
1526 	0x3c04c, 0xffffffff, 0x00030002,
1527 	0x3c050, 0xffffffff, 0x00040007,
1528 	0x3c054, 0xffffffff, 0x00060005,
1529 	0x3c058, 0xffffffff, 0x00090008,
1530 	0x3c05c, 0xffffffff, 0x00010000,
1531 	0x3c060, 0xffffffff, 0x00030002,
1532 	0x3c064, 0xffffffff, 0x00040007,
1533 	0x3c068, 0xffffffff, 0x00060005,
1534 	0x3c06c, 0xffffffff, 0x00090008,
1535 	0x3c070, 0xffffffff, 0x00010000,
1536 	0x3c074, 0xffffffff, 0x00030002,
1537 	0x3c078, 0xffffffff, 0x00040007,
1538 	0x3c07c, 0xffffffff, 0x00060005,
1539 	0x3c080, 0xffffffff, 0x00090008,
1540 	0x3c084, 0xffffffff, 0x00010000,
1541 	0x3c088, 0xffffffff, 0x00030002,
1542 	0x3c08c, 0xffffffff, 0x00040007,
1543 	0x3c090, 0xffffffff, 0x00060005,
1544 	0x3c094, 0xffffffff, 0x00090008,
1545 	0x3c098, 0xffffffff, 0x00010000,
1546 	0x3c09c, 0xffffffff, 0x00030002,
1547 	0x3c0a0, 0xffffffff, 0x00040007,
1548 	0x3c0a4, 0xffffffff, 0x00060005,
1549 	0x3c0a8, 0xffffffff, 0x00090008,
1550 	0x3c0ac, 0xffffffff, 0x00010000,
1551 	0x3c0b0, 0xffffffff, 0x00030002,
1552 	0x3c0b4, 0xffffffff, 0x00040007,
1553 	0x3c0b8, 0xffffffff, 0x00060005,
1554 	0x3c0bc, 0xffffffff, 0x00090008,
1555 	0x3c0c0, 0xffffffff, 0x00010000,
1556 	0x3c0c4, 0xffffffff, 0x00030002,
1557 	0x3c0c8, 0xffffffff, 0x00040007,
1558 	0x3c0cc, 0xffffffff, 0x00060005,
1559 	0x3c0d0, 0xffffffff, 0x00090008,
1560 	0x3c0d4, 0xffffffff, 0x00010000,
1561 	0x3c0d8, 0xffffffff, 0x00030002,
1562 	0x3c0dc, 0xffffffff, 0x00040007,
1563 	0x3c0e0, 0xffffffff, 0x00060005,
1564 	0x3c0e4, 0xffffffff, 0x00090008,
1565 	0x3c0e8, 0xffffffff, 0x00010000,
1566 	0x3c0ec, 0xffffffff, 0x00030002,
1567 	0x3c0f0, 0xffffffff, 0x00040007,
1568 	0x3c0f4, 0xffffffff, 0x00060005,
1569 	0x3c0f8, 0xffffffff, 0x00090008,
1570 	0xc318, 0xffffffff, 0x00020200,
1571 	0x3350, 0xffffffff, 0x00000200,
1572 	0x15c0, 0xffffffff, 0x00000400,
1573 	0x55e8, 0xffffffff, 0x00000000,
1574 	0x2f50, 0xffffffff, 0x00000902,
1575 	0x3c000, 0xffffffff, 0x96940200,
1576 	0x8708, 0xffffffff, 0x00900100,
1577 	0xc424, 0xffffffff, 0x0020003f,
1578 	0x38, 0xffffffff, 0x0140001c,
1579 	0x3c, 0x000f0000, 0x000f0000,
1580 	0x220, 0xffffffff, 0xc060000c,
1581 	0x224, 0xc0000fff, 0x00000100,
1582 	0xf90, 0xffffffff, 0x00000100,
1583 	0xf98, 0x00000101, 0x00000000,
1584 	0x20a8, 0xffffffff, 0x00000104,
1585 	0x55e4, 0xff000fff, 0x00000100,
1586 	0x30cc, 0xc0000fff, 0x00000104,
1587 	0xc1e4, 0x00000001, 0x00000001,
1588 	0xd00c, 0xff000ff0, 0x00000100,
1589 	0xd80c, 0xff000ff0, 0x00000100
1590 };
1591 
1592 static const u32 godavari_golden_registers[] =
1593 {
1594 	0x55e4, 0xff607fff, 0xfc000100,
1595 	0x6ed8, 0x00010101, 0x00010000,
1596 	0x9830, 0xffffffff, 0x00000000,
1597 	0x98302, 0xf00fffff, 0x00000400,
1598 	0x6130, 0xffffffff, 0x00010000,
1599 	0x5bb0, 0x000000f0, 0x00000070,
1600 	0x5bc0, 0xf0311fff, 0x80300000,
1601 	0x98f8, 0x73773777, 0x12010001,
1602 	0x98fc, 0xffffffff, 0x00000010,
1603 	0x8030, 0x00001f0f, 0x0000100a,
1604 	0x2f48, 0x73773777, 0x12010001,
1605 	0x2408, 0x000fffff, 0x000c007f,
1606 	0x8a14, 0xf000003f, 0x00000007,
1607 	0x8b24, 0xffffffff, 0x00ff0fff,
1608 	0x30a04, 0x0000ff0f, 0x00000000,
1609 	0x28a4c, 0x07ffffff, 0x06000000,
1610 	0x4d8, 0x00000fff, 0x00000100,
1611 	0xd014, 0x00010000, 0x00810001,
1612 	0xd814, 0x00010000, 0x00810001,
1613 	0x3e78, 0x00000001, 0x00000002,
1614 	0xc768, 0x00000008, 0x00000008,
1615 	0xc770, 0x00000f00, 0x00000800,
1616 	0xc774, 0x00000f00, 0x00000800,
1617 	0xc798, 0x00ffffff, 0x00ff7fbf,
1618 	0xc79c, 0x00ffffff, 0x00ff7faf,
1619 	0x8c00, 0x000000ff, 0x00000001,
1620 	0x214f8, 0x01ff01ff, 0x00000002,
1621 	0x21498, 0x007ff800, 0x00200000,
1622 	0x2015c, 0xffffffff, 0x00000f40,
1623 	0x88c4, 0x001f3ae3, 0x00000082,
1624 	0x88d4, 0x0000001f, 0x00000010,
1625 	0x30934, 0xffffffff, 0x00000000
1626 };
1627 
1628 
1629 static void cik_init_golden_registers(struct radeon_device *rdev)
1630 {
1631 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1632 	mutex_lock(&rdev->grbm_idx_mutex);
1633 	switch (rdev->family) {
1634 	case CHIP_BONAIRE:
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_mgcg_cgcg_init,
1637 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_common_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1644 		radeon_program_register_sequence(rdev,
1645 						 bonaire_golden_spm_registers,
1646 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1647 		break;
1648 	case CHIP_KABINI:
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_mgcg_cgcg_init,
1651 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_common_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_golden_spm_registers,
1660 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1661 		break;
1662 	case CHIP_MULLINS:
1663 		radeon_program_register_sequence(rdev,
1664 						 kalindi_mgcg_cgcg_init,
1665 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1666 		radeon_program_register_sequence(rdev,
1667 						 godavari_golden_registers,
1668 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_common_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1672 		radeon_program_register_sequence(rdev,
1673 						 kalindi_golden_spm_registers,
1674 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1675 		break;
1676 	case CHIP_KAVERI:
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_mgcg_cgcg_init,
1679 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_common_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1686 		radeon_program_register_sequence(rdev,
1687 						 spectre_golden_spm_registers,
1688 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1689 		break;
1690 	case CHIP_HAWAII:
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_mgcg_cgcg_init,
1693 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_common_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1700 		radeon_program_register_sequence(rdev,
1701 						 hawaii_golden_spm_registers,
1702 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1703 		break;
1704 	default:
1705 		break;
1706 	}
1707 	mutex_unlock(&rdev->grbm_idx_mutex);
1708 }
1709 
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720 	u32 reference_clock = rdev->clock.spll.reference_freq;
1721 
1722 	if (rdev->flags & RADEON_IS_IGP) {
1723 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724 			return reference_clock / 2;
1725 	} else {
1726 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727 			return reference_clock / 4;
1728 	}
1729 	return reference_clock;
1730 }
1731 
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743 	if (index < rdev->doorbell.num_doorbells) {
1744 		return readl(rdev->doorbell.ptr + index);
1745 	} else {
1746 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747 		return 0;
1748 	}
1749 }
1750 
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763 	if (index < rdev->doorbell.num_doorbells) {
1764 		writel(v, rdev->doorbell.ptr + index);
1765 	} else {
1766 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767 	}
1768 }
1769 
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771 
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774 	{0x00000070, 0x04400000},
1775 	{0x00000071, 0x80c01803},
1776 	{0x00000072, 0x00004004},
1777 	{0x00000073, 0x00000100},
1778 	{0x00000074, 0x00ff0000},
1779 	{0x00000075, 0x34000000},
1780 	{0x00000076, 0x08000014},
1781 	{0x00000077, 0x00cc08ec},
1782 	{0x00000078, 0x00000400},
1783 	{0x00000079, 0x00000000},
1784 	{0x0000007a, 0x04090000},
1785 	{0x0000007c, 0x00000000},
1786 	{0x0000007e, 0x4408a8e8},
1787 	{0x0000007f, 0x00000304},
1788 	{0x00000080, 0x00000000},
1789 	{0x00000082, 0x00000001},
1790 	{0x00000083, 0x00000002},
1791 	{0x00000084, 0xf3e4f400},
1792 	{0x00000085, 0x052024e3},
1793 	{0x00000087, 0x00000000},
1794 	{0x00000088, 0x01000000},
1795 	{0x0000008a, 0x1c0a0000},
1796 	{0x0000008b, 0xff010000},
1797 	{0x0000008d, 0xffffefff},
1798 	{0x0000008e, 0xfff3efff},
1799 	{0x0000008f, 0xfff3efbf},
1800 	{0x00000092, 0xf7ffffff},
1801 	{0x00000093, 0xffffff7f},
1802 	{0x00000095, 0x00101101},
1803 	{0x00000096, 0x00000fff},
1804 	{0x00000097, 0x00116fff},
1805 	{0x00000098, 0x60010000},
1806 	{0x00000099, 0x10010000},
1807 	{0x0000009a, 0x00006000},
1808 	{0x0000009b, 0x00001000},
1809 	{0x0000009f, 0x00b48000}
1810 };
1811 
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813 
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816 	{0x0000007d, 0x40000000},
1817 	{0x0000007e, 0x40180304},
1818 	{0x0000007f, 0x0000ff00},
1819 	{0x00000081, 0x00000000},
1820 	{0x00000083, 0x00000800},
1821 	{0x00000086, 0x00000000},
1822 	{0x00000087, 0x00000100},
1823 	{0x00000088, 0x00020100},
1824 	{0x00000089, 0x00000000},
1825 	{0x0000008b, 0x00040000},
1826 	{0x0000008c, 0x00000100},
1827 	{0x0000008e, 0xff010000},
1828 	{0x00000090, 0xffffefff},
1829 	{0x00000091, 0xfff3efff},
1830 	{0x00000092, 0xfff3efbf},
1831 	{0x00000093, 0xf7ffffff},
1832 	{0x00000094, 0xffffff7f},
1833 	{0x00000095, 0x00000fff},
1834 	{0x00000096, 0x00116fff},
1835 	{0x00000097, 0x60010000},
1836 	{0x00000098, 0x10010000},
1837 	{0x0000009f, 0x00c79000}
1838 };
1839 
1840 
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858 			     MEID(me & 0x3) |
1859 			     VMID(vmid & 0xf) |
1860 			     QUEUEID(queue & 0x7));
1861 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863 
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875 	const __be32 *fw_data = NULL;
1876 	const __le32 *new_fw_data = NULL;
1877 	u32 running, tmp;
1878 	u32 *io_mc_regs = NULL;
1879 	const __le32 *new_io_mc_regs = NULL;
1880 	int i, regs_size, ucode_size;
1881 
1882 	if (!rdev->mc_fw)
1883 		return -EINVAL;
1884 
1885 	if (rdev->new_fw) {
1886 		const struct mc_firmware_header_v1_0 *hdr =
1887 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888 
1889 		radeon_ucode_print_mc_hdr(&hdr->header);
1890 
1891 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892 		new_io_mc_regs = (const __le32 *)
1893 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895 		new_fw_data = (const __le32 *)
1896 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 	} else {
1898 		ucode_size = rdev->mc_fw->size / 4;
1899 
1900 		switch (rdev->family) {
1901 		case CHIP_BONAIRE:
1902 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904 			break;
1905 		case CHIP_HAWAII:
1906 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1908 			break;
1909 		default:
1910 			return -EINVAL;
1911 		}
1912 		fw_data = (const __be32 *)rdev->mc_fw->data;
1913 	}
1914 
1915 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916 
1917 	if (running == 0) {
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 	}
1966 
1967 	return 0;
1968 }
1969 
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981 	const char *chip_name;
1982 	const char *new_chip_name;
1983 	size_t pfp_req_size, me_req_size, ce_req_size,
1984 		mec_req_size, rlc_req_size, mc_req_size = 0,
1985 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986 	char fw_name[30];
1987 	int new_fw = 0;
1988 	int err;
1989 	int num_fw;
1990 	bool new_smc = false;
1991 
1992 	DRM_DEBUG("\n");
1993 
1994 	switch (rdev->family) {
1995 	case CHIP_BONAIRE:
1996 		chip_name = "BONAIRE";
1997 		if ((rdev->pdev->revision == 0x80) ||
1998 		    (rdev->pdev->revision == 0x81) ||
1999 		    (rdev->pdev->device == 0x665f))
2000 			new_smc = true;
2001 		new_chip_name = "bonaire";
2002 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2004 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011 		num_fw = 8;
2012 		break;
2013 	case CHIP_HAWAII:
2014 		chip_name = "HAWAII";
2015 		if (rdev->pdev->revision == 0x80)
2016 			new_smc = true;
2017 		new_chip_name = "hawaii";
2018 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027 		num_fw = 8;
2028 		break;
2029 	case CHIP_KAVERI:
2030 		chip_name = "KAVERI";
2031 		new_chip_name = "kaveri";
2032 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2034 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038 		num_fw = 7;
2039 		break;
2040 	case CHIP_KABINI:
2041 		chip_name = "KABINI";
2042 		new_chip_name = "kabini";
2043 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2045 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049 		num_fw = 6;
2050 		break;
2051 	case CHIP_MULLINS:
2052 		chip_name = "MULLINS";
2053 		new_chip_name = "mullins";
2054 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2056 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060 		num_fw = 6;
2061 		break;
2062 	default: BUG();
2063 	}
2064 
2065 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->pfp_fw->size != pfp_req_size) {
2075 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076 			       rdev->pfp_fw->size, fw_name);
2077 			err = -EINVAL;
2078 			goto out;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->pfp_fw);
2082 		if (err) {
2083 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084 			       fw_name);
2085 			goto out;
2086 		} else {
2087 			new_fw++;
2088 		}
2089 	}
2090 
2091 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093 	if (err) {
2094 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096 		if (err)
2097 			goto out;
2098 		if (rdev->me_fw->size != me_req_size) {
2099 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100 			       rdev->me_fw->size, fw_name);
2101 			err = -EINVAL;
2102 		}
2103 	} else {
2104 		err = radeon_ucode_validate(rdev->me_fw);
2105 		if (err) {
2106 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123 			       rdev->ce_fw->size, fw_name);
2124 			err = -EINVAL;
2125 		}
2126 	} else {
2127 		err = radeon_ucode_validate(rdev->ce_fw);
2128 		if (err) {
2129 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130 			       fw_name);
2131 			goto out;
2132 		} else {
2133 			new_fw++;
2134 		}
2135 	}
2136 
2137 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139 	if (err) {
2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142 		if (err)
2143 			goto out;
2144 		if (rdev->mec_fw->size != mec_req_size) {
2145 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146 			       rdev->mec_fw->size, fw_name);
2147 			err = -EINVAL;
2148 		}
2149 	} else {
2150 		err = radeon_ucode_validate(rdev->mec_fw);
2151 		if (err) {
2152 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153 			       fw_name);
2154 			goto out;
2155 		} else {
2156 			new_fw++;
2157 		}
2158 	}
2159 
2160 	if (rdev->family == CHIP_KAVERI) {
2161 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163 		if (err) {
2164 			goto out;
2165 		} else {
2166 			err = radeon_ucode_validate(rdev->mec2_fw);
2167 			if (err) {
2168 				goto out;
2169 			} else {
2170 				new_fw++;
2171 			}
2172 		}
2173 	}
2174 
2175 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177 	if (err) {
2178 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180 		if (err)
2181 			goto out;
2182 		if (rdev->rlc_fw->size != rlc_req_size) {
2183 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184 			       rdev->rlc_fw->size, fw_name);
2185 			err = -EINVAL;
2186 		}
2187 	} else {
2188 		err = radeon_ucode_validate(rdev->rlc_fw);
2189 		if (err) {
2190 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191 			       fw_name);
2192 			goto out;
2193 		} else {
2194 			new_fw++;
2195 		}
2196 	}
2197 
2198 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200 	if (err) {
2201 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203 		if (err)
2204 			goto out;
2205 		if (rdev->sdma_fw->size != sdma_req_size) {
2206 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207 			       rdev->sdma_fw->size, fw_name);
2208 			err = -EINVAL;
2209 		}
2210 	} else {
2211 		err = radeon_ucode_validate(rdev->sdma_fw);
2212 		if (err) {
2213 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214 			       fw_name);
2215 			goto out;
2216 		} else {
2217 			new_fw++;
2218 		}
2219 	}
2220 
2221 	/* No SMC, MC ucode on APUs */
2222 	if (!(rdev->flags & RADEON_IS_IGP)) {
2223 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225 		if (err) {
2226 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228 			if (err) {
2229 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231 				if (err)
2232 					goto out;
2233 			}
2234 			if ((rdev->mc_fw->size != mc_req_size) &&
2235 			    (rdev->mc_fw->size != mc2_req_size)){
2236 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237 				       rdev->mc_fw->size, fw_name);
2238 				err = -EINVAL;
2239 			}
2240 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241 		} else {
2242 			err = radeon_ucode_validate(rdev->mc_fw);
2243 			if (err) {
2244 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245 				       fw_name);
2246 				goto out;
2247 			} else {
2248 				new_fw++;
2249 			}
2250 		}
2251 
2252 		if (new_smc)
2253 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254 		else
2255 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257 		if (err) {
2258 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260 			if (err) {
2261 				pr_err("smc: error loading firmware \"%s\"\n",
2262 				       fw_name);
2263 				release_firmware(rdev->smc_fw);
2264 				rdev->smc_fw = NULL;
2265 				err = 0;
2266 			} else if (rdev->smc_fw->size != smc_req_size) {
2267 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268 				       rdev->smc_fw->size, fw_name);
2269 				err = -EINVAL;
2270 			}
2271 		} else {
2272 			err = radeon_ucode_validate(rdev->smc_fw);
2273 			if (err) {
2274 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275 				       fw_name);
2276 				goto out;
2277 			} else {
2278 				new_fw++;
2279 			}
2280 		}
2281 	}
2282 
2283 	if (new_fw == 0) {
2284 		rdev->new_fw = false;
2285 	} else if (new_fw < num_fw) {
2286 		pr_err("ci_fw: mixing new and old firmware!\n");
2287 		err = -EINVAL;
2288 	} else {
2289 		rdev->new_fw = true;
2290 	}
2291 
2292 out:
2293 	if (err) {
2294 		if (err != -EINVAL)
2295 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296 			       fw_name);
2297 		release_firmware(rdev->pfp_fw);
2298 		rdev->pfp_fw = NULL;
2299 		release_firmware(rdev->me_fw);
2300 		rdev->me_fw = NULL;
2301 		release_firmware(rdev->ce_fw);
2302 		rdev->ce_fw = NULL;
2303 		release_firmware(rdev->mec_fw);
2304 		rdev->mec_fw = NULL;
2305 		release_firmware(rdev->mec2_fw);
2306 		rdev->mec2_fw = NULL;
2307 		release_firmware(rdev->rlc_fw);
2308 		rdev->rlc_fw = NULL;
2309 		release_firmware(rdev->sdma_fw);
2310 		rdev->sdma_fw = NULL;
2311 		release_firmware(rdev->mc_fw);
2312 		rdev->mc_fw = NULL;
2313 		release_firmware(rdev->smc_fw);
2314 		rdev->smc_fw = NULL;
2315 	}
2316 	return err;
2317 }
2318 
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335 	u32 *tile = rdev->config.cik.tile_mode_array;
2336 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337 	const u32 num_tile_mode_states =
2338 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339 	const u32 num_secondary_tile_mode_states =
2340 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341 	u32 reg_offset, split_equal_to_row_size;
2342 	u32 num_pipe_configs;
2343 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344 		rdev->config.cik.max_shader_engines;
2345 
2346 	switch (rdev->config.cik.mem_row_size_in_kb) {
2347 	case 1:
2348 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349 		break;
2350 	case 2:
2351 	default:
2352 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353 		break;
2354 	case 4:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356 		break;
2357 	}
2358 
2359 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360 	if (num_pipe_configs > 8)
2361 		num_pipe_configs = 16;
2362 
2363 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364 		tile[reg_offset] = 0;
2365 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366 		macrotile[reg_offset] = 0;
2367 
2368 	switch(num_pipe_configs) {
2369 	case 16:
2370 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 			   TILE_SPLIT(split_equal_to_row_size));
2390 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(split_equal_to_row_size));
2401 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 
2449 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 			   NUM_BANKS(ADDR_SURF_16_BANK));
2453 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 			   NUM_BANKS(ADDR_SURF_16_BANK));
2457 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 			   NUM_BANKS(ADDR_SURF_16_BANK));
2461 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 			   NUM_BANKS(ADDR_SURF_16_BANK));
2465 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 			   NUM_BANKS(ADDR_SURF_8_BANK));
2469 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 			   NUM_BANKS(ADDR_SURF_4_BANK));
2473 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 			   NUM_BANKS(ADDR_SURF_2_BANK));
2477 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			    NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			    NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			    NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			    NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 			    NUM_BANKS(ADDR_SURF_2_BANK));
2505 
2506 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510 		break;
2511 
2512 	case 8:
2513 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 			   TILE_SPLIT(split_equal_to_row_size));
2533 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(split_equal_to_row_size));
2544 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 
2592 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595 				NUM_BANKS(ADDR_SURF_16_BANK));
2596 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 				NUM_BANKS(ADDR_SURF_16_BANK));
2600 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 				NUM_BANKS(ADDR_SURF_16_BANK));
2604 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607 				NUM_BANKS(ADDR_SURF_16_BANK));
2608 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 				NUM_BANKS(ADDR_SURF_8_BANK));
2612 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 				NUM_BANKS(ADDR_SURF_4_BANK));
2616 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619 				NUM_BANKS(ADDR_SURF_2_BANK));
2620 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635 				NUM_BANKS(ADDR_SURF_16_BANK));
2636 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_8_BANK));
2640 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_4_BANK));
2644 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647 				NUM_BANKS(ADDR_SURF_2_BANK));
2648 
2649 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653 		break;
2654 
2655 	case 4:
2656 		if (num_rbs == 4) {
2657 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 			   TILE_SPLIT(split_equal_to_row_size));
2677 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(split_equal_to_row_size));
2688 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 
2736 		} else if (num_rbs < 4) {
2737 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(split_equal_to_row_size));
2757 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(split_equal_to_row_size));
2768 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		}
2816 
2817 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 				NUM_BANKS(ADDR_SURF_16_BANK));
2829 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 				NUM_BANKS(ADDR_SURF_8_BANK));
2841 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 				NUM_BANKS(ADDR_SURF_4_BANK));
2845 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_16_BANK));
2865 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868 				NUM_BANKS(ADDR_SURF_8_BANK));
2869 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872 				NUM_BANKS(ADDR_SURF_4_BANK));
2873 
2874 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878 		break;
2879 
2880 	case 2:
2881 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P2) |
2884 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 			   PIPE_CONFIG(ADDR_SURF_P2) |
2888 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P2) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P2) |
2896 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P2) |
2900 			   TILE_SPLIT(split_equal_to_row_size));
2901 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(split_equal_to_row_size));
2912 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913 			   PIPE_CONFIG(ADDR_SURF_P2);
2914 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916 			   PIPE_CONFIG(ADDR_SURF_P2));
2917 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 			    PIPE_CONFIG(ADDR_SURF_P2) |
2920 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 			    PIPE_CONFIG(ADDR_SURF_P2) |
2924 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 			    PIPE_CONFIG(ADDR_SURF_P2) |
2928 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 			    PIPE_CONFIG(ADDR_SURF_P2));
2947 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 
2960 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 				NUM_BANKS(ADDR_SURF_16_BANK));
2964 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 				NUM_BANKS(ADDR_SURF_16_BANK));
2980 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 				NUM_BANKS(ADDR_SURF_16_BANK));
2984 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987 				NUM_BANKS(ADDR_SURF_8_BANK));
2988 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 				NUM_BANKS(ADDR_SURF_16_BANK));
3012 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015 				NUM_BANKS(ADDR_SURF_8_BANK));
3016 
3017 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021 		break;
3022 
3023 	default:
3024 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025 	}
3026 }
3027 
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040 			     u32 se_num, u32 sh_num)
3041 {
3042 	u32 data = INSTANCE_BROADCAST_WRITES;
3043 
3044 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046 	else if (se_num == 0xffffffff)
3047 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048 	else if (sh_num == 0xffffffff)
3049 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050 	else
3051 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052 	WREG32(GRBM_GFX_INDEX, data);
3053 }
3054 
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065 	u32 i, mask = 0;
3066 
3067 	for (i = 0; i < bit_width; i++) {
3068 		mask <<= 1;
3069 		mask |= 1;
3070 	}
3071 	return mask;
3072 }
3073 
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086 			      u32 max_rb_num_per_se,
3087 			      u32 sh_per_se)
3088 {
3089 	u32 data, mask;
3090 
3091 	data = RREG32(CC_RB_BACKEND_DISABLE);
3092 	if (data & 1)
3093 		data &= BACKEND_DISABLE_MASK;
3094 	else
3095 		data = 0;
3096 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097 
3098 	data >>= BACKEND_DISABLE_SHIFT;
3099 
3100 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101 
3102 	return data & mask;
3103 }
3104 
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116 			 u32 se_num, u32 sh_per_se,
3117 			 u32 max_rb_num_per_se)
3118 {
3119 	int i, j;
3120 	u32 data, mask;
3121 	u32 disabled_rbs = 0;
3122 	u32 enabled_rbs = 0;
3123 
3124 	mutex_lock(&rdev->grbm_idx_mutex);
3125 	for (i = 0; i < se_num; i++) {
3126 		for (j = 0; j < sh_per_se; j++) {
3127 			cik_select_se_sh(rdev, i, j);
3128 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3129 			if (rdev->family == CHIP_HAWAII)
3130 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3131 			else
3132 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3133 		}
3134 	}
3135 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3136 	mutex_unlock(&rdev->grbm_idx_mutex);
3137 
3138 	mask = 1;
3139 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3140 		if (!(disabled_rbs & mask))
3141 			enabled_rbs |= mask;
3142 		mask <<= 1;
3143 	}
3144 
3145 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3146 
3147 	mutex_lock(&rdev->grbm_idx_mutex);
3148 	for (i = 0; i < se_num; i++) {
3149 		cik_select_se_sh(rdev, i, 0xffffffff);
3150 		data = 0;
3151 		for (j = 0; j < sh_per_se; j++) {
3152 			switch (enabled_rbs & 3) {
3153 			case 0:
3154 				if (j == 0)
3155 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3156 				else
3157 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3158 				break;
3159 			case 1:
3160 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			case 2:
3163 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3164 				break;
3165 			case 3:
3166 			default:
3167 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3168 				break;
3169 			}
3170 			enabled_rbs >>= 2;
3171 		}
3172 		WREG32(PA_SC_RASTER_CONFIG, data);
3173 	}
3174 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3175 	mutex_unlock(&rdev->grbm_idx_mutex);
3176 }
3177 
3178 /**
3179  * cik_gpu_init - setup the 3D engine
3180  *
3181  * @rdev: radeon_device pointer
3182  *
3183  * Configures the 3D engine and tiling configuration
3184  * registers so that the 3D engine is usable.
3185  */
3186 static void cik_gpu_init(struct radeon_device *rdev)
3187 {
3188 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3189 	u32 mc_shared_chmap, mc_arb_ramcfg;
3190 	u32 hdp_host_path_cntl;
3191 	u32 tmp;
3192 	int i, j;
3193 
3194 	switch (rdev->family) {
3195 	case CHIP_BONAIRE:
3196 		rdev->config.cik.max_shader_engines = 2;
3197 		rdev->config.cik.max_tile_pipes = 4;
3198 		rdev->config.cik.max_cu_per_sh = 7;
3199 		rdev->config.cik.max_sh_per_se = 1;
3200 		rdev->config.cik.max_backends_per_se = 2;
3201 		rdev->config.cik.max_texture_channel_caches = 4;
3202 		rdev->config.cik.max_gprs = 256;
3203 		rdev->config.cik.max_gs_threads = 32;
3204 		rdev->config.cik.max_hw_contexts = 8;
3205 
3206 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3207 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3208 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3209 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3210 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3211 		break;
3212 	case CHIP_HAWAII:
3213 		rdev->config.cik.max_shader_engines = 4;
3214 		rdev->config.cik.max_tile_pipes = 16;
3215 		rdev->config.cik.max_cu_per_sh = 11;
3216 		rdev->config.cik.max_sh_per_se = 1;
3217 		rdev->config.cik.max_backends_per_se = 4;
3218 		rdev->config.cik.max_texture_channel_caches = 16;
3219 		rdev->config.cik.max_gprs = 256;
3220 		rdev->config.cik.max_gs_threads = 32;
3221 		rdev->config.cik.max_hw_contexts = 8;
3222 
3223 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3224 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3225 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3226 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3227 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3228 		break;
3229 	case CHIP_KAVERI:
3230 		rdev->config.cik.max_shader_engines = 1;
3231 		rdev->config.cik.max_tile_pipes = 4;
3232 		if ((rdev->pdev->device == 0x1304) ||
3233 		    (rdev->pdev->device == 0x1305) ||
3234 		    (rdev->pdev->device == 0x130C) ||
3235 		    (rdev->pdev->device == 0x130F) ||
3236 		    (rdev->pdev->device == 0x1310) ||
3237 		    (rdev->pdev->device == 0x1311) ||
3238 		    (rdev->pdev->device == 0x131C)) {
3239 			rdev->config.cik.max_cu_per_sh = 8;
3240 			rdev->config.cik.max_backends_per_se = 2;
3241 		} else if ((rdev->pdev->device == 0x1309) ||
3242 			   (rdev->pdev->device == 0x130A) ||
3243 			   (rdev->pdev->device == 0x130D) ||
3244 			   (rdev->pdev->device == 0x1313) ||
3245 			   (rdev->pdev->device == 0x131D)) {
3246 			rdev->config.cik.max_cu_per_sh = 6;
3247 			rdev->config.cik.max_backends_per_se = 2;
3248 		} else if ((rdev->pdev->device == 0x1306) ||
3249 			   (rdev->pdev->device == 0x1307) ||
3250 			   (rdev->pdev->device == 0x130B) ||
3251 			   (rdev->pdev->device == 0x130E) ||
3252 			   (rdev->pdev->device == 0x1315) ||
3253 			   (rdev->pdev->device == 0x1318) ||
3254 			   (rdev->pdev->device == 0x131B)) {
3255 			rdev->config.cik.max_cu_per_sh = 4;
3256 			rdev->config.cik.max_backends_per_se = 1;
3257 		} else {
3258 			rdev->config.cik.max_cu_per_sh = 3;
3259 			rdev->config.cik.max_backends_per_se = 1;
3260 		}
3261 		rdev->config.cik.max_sh_per_se = 1;
3262 		rdev->config.cik.max_texture_channel_caches = 4;
3263 		rdev->config.cik.max_gprs = 256;
3264 		rdev->config.cik.max_gs_threads = 16;
3265 		rdev->config.cik.max_hw_contexts = 8;
3266 
3267 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3268 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3269 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3270 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3271 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3272 		break;
3273 	case CHIP_KABINI:
3274 	case CHIP_MULLINS:
3275 	default:
3276 		rdev->config.cik.max_shader_engines = 1;
3277 		rdev->config.cik.max_tile_pipes = 2;
3278 		rdev->config.cik.max_cu_per_sh = 2;
3279 		rdev->config.cik.max_sh_per_se = 1;
3280 		rdev->config.cik.max_backends_per_se = 1;
3281 		rdev->config.cik.max_texture_channel_caches = 2;
3282 		rdev->config.cik.max_gprs = 256;
3283 		rdev->config.cik.max_gs_threads = 16;
3284 		rdev->config.cik.max_hw_contexts = 8;
3285 
3286 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291 		break;
3292 	}
3293 
3294 	/* Initialize HDP */
3295 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296 		WREG32((0x2c14 + j), 0x00000000);
3297 		WREG32((0x2c18 + j), 0x00000000);
3298 		WREG32((0x2c1c + j), 0x00000000);
3299 		WREG32((0x2c20 + j), 0x00000000);
3300 		WREG32((0x2c24 + j), 0x00000000);
3301 	}
3302 
3303 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304 	WREG32(SRBM_INT_CNTL, 0x1);
3305 	WREG32(SRBM_INT_ACK, 0x1);
3306 
3307 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3308 
3309 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3310 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3311 
3312 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3313 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3314 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3315 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3316 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3317 		rdev->config.cik.mem_row_size_in_kb = 4;
3318 	/* XXX use MC settings? */
3319 	rdev->config.cik.shader_engine_tile_size = 32;
3320 	rdev->config.cik.num_gpus = 1;
3321 	rdev->config.cik.multi_gpu_tile_size = 64;
3322 
3323 	/* fix up row size */
3324 	gb_addr_config &= ~ROW_SIZE_MASK;
3325 	switch (rdev->config.cik.mem_row_size_in_kb) {
3326 	case 1:
3327 	default:
3328 		gb_addr_config |= ROW_SIZE(0);
3329 		break;
3330 	case 2:
3331 		gb_addr_config |= ROW_SIZE(1);
3332 		break;
3333 	case 4:
3334 		gb_addr_config |= ROW_SIZE(2);
3335 		break;
3336 	}
3337 
3338 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3339 	 * not have bank info, so create a custom tiling dword.
3340 	 * bits 3:0   num_pipes
3341 	 * bits 7:4   num_banks
3342 	 * bits 11:8  group_size
3343 	 * bits 15:12 row_size
3344 	 */
3345 	rdev->config.cik.tile_config = 0;
3346 	switch (rdev->config.cik.num_tile_pipes) {
3347 	case 1:
3348 		rdev->config.cik.tile_config |= (0 << 0);
3349 		break;
3350 	case 2:
3351 		rdev->config.cik.tile_config |= (1 << 0);
3352 		break;
3353 	case 4:
3354 		rdev->config.cik.tile_config |= (2 << 0);
3355 		break;
3356 	case 8:
3357 	default:
3358 		/* XXX what about 12? */
3359 		rdev->config.cik.tile_config |= (3 << 0);
3360 		break;
3361 	}
3362 	rdev->config.cik.tile_config |=
3363 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3364 	rdev->config.cik.tile_config |=
3365 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3366 	rdev->config.cik.tile_config |=
3367 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3368 
3369 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3370 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3371 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3372 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3373 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3374 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3375 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3376 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3377 
3378 	cik_tiling_mode_table_init(rdev);
3379 
3380 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3381 		     rdev->config.cik.max_sh_per_se,
3382 		     rdev->config.cik.max_backends_per_se);
3383 
3384 	rdev->config.cik.active_cus = 0;
3385 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3386 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3387 			rdev->config.cik.active_cus +=
3388 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3389 		}
3390 	}
3391 
3392 	/* set HW defaults for 3D engine */
3393 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3394 
3395 	mutex_lock(&rdev->grbm_idx_mutex);
3396 	/*
3397 	 * making sure that the following register writes will be broadcasted
3398 	 * to all the shaders
3399 	 */
3400 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3401 	WREG32(SX_DEBUG_1, 0x20);
3402 
3403 	WREG32(TA_CNTL_AUX, 0x00010000);
3404 
3405 	tmp = RREG32(SPI_CONFIG_CNTL);
3406 	tmp |= 0x03000000;
3407 	WREG32(SPI_CONFIG_CNTL, tmp);
3408 
3409 	WREG32(SQ_CONFIG, 1);
3410 
3411 	WREG32(DB_DEBUG, 0);
3412 
3413 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3414 	tmp |= 0x00000400;
3415 	WREG32(DB_DEBUG2, tmp);
3416 
3417 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3418 	tmp |= 0x00020200;
3419 	WREG32(DB_DEBUG3, tmp);
3420 
3421 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3422 	tmp |= 0x00018208;
3423 	WREG32(CB_HW_CONTROL, tmp);
3424 
3425 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3426 
3427 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3428 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3429 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3430 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3431 
3432 	WREG32(VGT_NUM_INSTANCES, 1);
3433 
3434 	WREG32(CP_PERFMON_CNTL, 0);
3435 
3436 	WREG32(SQ_CONFIG, 0);
3437 
3438 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3439 					  FORCE_EOV_MAX_REZ_CNT(255)));
3440 
3441 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3442 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3443 
3444 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3445 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3446 
3447 	tmp = RREG32(HDP_MISC_CNTL);
3448 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3449 	WREG32(HDP_MISC_CNTL, tmp);
3450 
3451 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3452 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3453 
3454 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3455 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3456 	mutex_unlock(&rdev->grbm_idx_mutex);
3457 
3458 	udelay(50);
3459 }
3460 
3461 /*
3462  * GPU scratch registers helpers function.
3463  */
3464 /**
3465  * cik_scratch_init - setup driver info for CP scratch regs
3466  *
3467  * @rdev: radeon_device pointer
3468  *
3469  * Set up the number and offset of the CP scratch registers.
3470  * NOTE: use of CP scratch registers is a legacy inferface and
3471  * is not used by default on newer asics (r6xx+).  On newer asics,
3472  * memory buffers are used for fences rather than scratch regs.
3473  */
3474 static void cik_scratch_init(struct radeon_device *rdev)
3475 {
3476 	int i;
3477 
3478 	rdev->scratch.num_reg = 7;
3479 	rdev->scratch.reg_base = SCRATCH_REG0;
3480 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3481 		rdev->scratch.free[i] = true;
3482 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3483 	}
3484 }
3485 
3486 /**
3487  * cik_ring_test - basic gfx ring test
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ring: radeon_ring structure holding ring information
3491  *
3492  * Allocate a scratch register and write to it using the gfx ring (CIK).
3493  * Provides a basic gfx ring test to verify that the ring is working.
3494  * Used by cik_cp_gfx_resume();
3495  * Returns 0 on success, error on failure.
3496  */
3497 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3498 {
3499 	uint32_t scratch;
3500 	uint32_t tmp = 0;
3501 	unsigned i;
3502 	int r;
3503 
3504 	r = radeon_scratch_get(rdev, &scratch);
3505 	if (r) {
3506 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3507 		return r;
3508 	}
3509 	WREG32(scratch, 0xCAFEDEAD);
3510 	r = radeon_ring_lock(rdev, ring, 3);
3511 	if (r) {
3512 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3513 		radeon_scratch_free(rdev, scratch);
3514 		return r;
3515 	}
3516 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3517 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3518 	radeon_ring_write(ring, 0xDEADBEEF);
3519 	radeon_ring_unlock_commit(rdev, ring, false);
3520 
3521 	for (i = 0; i < rdev->usec_timeout; i++) {
3522 		tmp = RREG32(scratch);
3523 		if (tmp == 0xDEADBEEF)
3524 			break;
3525 		DRM_UDELAY(1);
3526 	}
3527 	if (i < rdev->usec_timeout) {
3528 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3529 	} else {
3530 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3531 			  ring->idx, scratch, tmp);
3532 		r = -EINVAL;
3533 	}
3534 	radeon_scratch_free(rdev, scratch);
3535 	return r;
3536 }
3537 
3538 /**
3539  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3540  *
3541  * @rdev: radeon_device pointer
3542  * @ridx: radeon ring index
3543  *
3544  * Emits an hdp flush on the cp.
3545  */
3546 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3547 				       int ridx)
3548 {
3549 	struct radeon_ring *ring = &rdev->ring[ridx];
3550 	u32 ref_and_mask;
3551 
3552 	switch (ring->idx) {
3553 	case CAYMAN_RING_TYPE_CP1_INDEX:
3554 	case CAYMAN_RING_TYPE_CP2_INDEX:
3555 	default:
3556 		switch (ring->me) {
3557 		case 0:
3558 			ref_and_mask = CP2 << ring->pipe;
3559 			break;
3560 		case 1:
3561 			ref_and_mask = CP6 << ring->pipe;
3562 			break;
3563 		default:
3564 			return;
3565 		}
3566 		break;
3567 	case RADEON_RING_TYPE_GFX_INDEX:
3568 		ref_and_mask = CP0;
3569 		break;
3570 	}
3571 
3572 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3573 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3574 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3575 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3576 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3577 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3578 	radeon_ring_write(ring, ref_and_mask);
3579 	radeon_ring_write(ring, ref_and_mask);
3580 	radeon_ring_write(ring, 0x20); /* poll interval */
3581 }
3582 
3583 /**
3584  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3585  *
3586  * @rdev: radeon_device pointer
3587  * @fence: radeon fence object
3588  *
3589  * Emits a fence sequnce number on the gfx ring and flushes
3590  * GPU caches.
3591  */
3592 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3593 			     struct radeon_fence *fence)
3594 {
3595 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3596 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3597 
3598 	/* Workaround for cache flush problems. First send a dummy EOP
3599 	 * event down the pipe with seq one below.
3600 	 */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603 				 EOP_TC_ACTION_EN |
3604 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605 				 EVENT_INDEX(5)));
3606 	radeon_ring_write(ring, addr & 0xfffffffc);
3607 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3608 				DATA_SEL(1) | INT_SEL(0));
3609 	radeon_ring_write(ring, fence->seq - 1);
3610 	radeon_ring_write(ring, 0);
3611 
3612 	/* Then send the real EOP event down the pipe. */
3613 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3614 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3615 				 EOP_TC_ACTION_EN |
3616 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3617 				 EVENT_INDEX(5)));
3618 	radeon_ring_write(ring, addr & 0xfffffffc);
3619 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3620 	radeon_ring_write(ring, fence->seq);
3621 	radeon_ring_write(ring, 0);
3622 }
3623 
3624 /**
3625  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3626  *
3627  * @rdev: radeon_device pointer
3628  * @fence: radeon fence object
3629  *
3630  * Emits a fence sequnce number on the compute ring and flushes
3631  * GPU caches.
3632  */
3633 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3634 				 struct radeon_fence *fence)
3635 {
3636 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3637 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3638 
3639 	/* RELEASE_MEM - flush caches, send int */
3640 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3641 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3642 				 EOP_TC_ACTION_EN |
3643 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3644 				 EVENT_INDEX(5)));
3645 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3646 	radeon_ring_write(ring, addr & 0xfffffffc);
3647 	radeon_ring_write(ring, upper_32_bits(addr));
3648 	radeon_ring_write(ring, fence->seq);
3649 	radeon_ring_write(ring, 0);
3650 }
3651 
3652 /**
3653  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3654  *
3655  * @rdev: radeon_device pointer
3656  * @ring: radeon ring buffer object
3657  * @semaphore: radeon semaphore object
3658  * @emit_wait: Is this a sempahore wait?
3659  *
3660  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3661  * from running ahead of semaphore waits.
3662  */
3663 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3664 			     struct radeon_ring *ring,
3665 			     struct radeon_semaphore *semaphore,
3666 			     bool emit_wait)
3667 {
3668 	uint64_t addr = semaphore->gpu_addr;
3669 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3670 
3671 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3672 	radeon_ring_write(ring, lower_32_bits(addr));
3673 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3674 
3675 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3676 		/* Prevent the PFP from running ahead of the semaphore wait */
3677 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3678 		radeon_ring_write(ring, 0x0);
3679 	}
3680 
3681 	return true;
3682 }
3683 
3684 /**
3685  * cik_copy_cpdma - copy pages using the CP DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @src_offset: src GPU address
3689  * @dst_offset: dst GPU address
3690  * @num_gpu_pages: number of GPU pages to xfer
3691  * @resv: reservation object to sync to
3692  *
3693  * Copy GPU paging using the CP DMA engine (CIK+).
3694  * Used by the radeon ttm implementation to move pages if
3695  * registered as the asic copy callback.
3696  */
3697 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3698 				    uint64_t src_offset, uint64_t dst_offset,
3699 				    unsigned num_gpu_pages,
3700 				    struct reservation_object *resv)
3701 {
3702 	struct radeon_fence *fence;
3703 	struct radeon_sync sync;
3704 	int ring_index = rdev->asic->copy.blit_ring_index;
3705 	struct radeon_ring *ring = &rdev->ring[ring_index];
3706 	u32 size_in_bytes, cur_size_in_bytes, control;
3707 	int i, num_loops;
3708 	int r = 0;
3709 
3710 	radeon_sync_create(&sync);
3711 
3712 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3713 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3714 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3715 	if (r) {
3716 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3717 		radeon_sync_free(rdev, &sync, NULL);
3718 		return ERR_PTR(r);
3719 	}
3720 
3721 	radeon_sync_resv(rdev, &sync, resv, false);
3722 	radeon_sync_rings(rdev, &sync, ring->idx);
3723 
3724 	for (i = 0; i < num_loops; i++) {
3725 		cur_size_in_bytes = size_in_bytes;
3726 		if (cur_size_in_bytes > 0x1fffff)
3727 			cur_size_in_bytes = 0x1fffff;
3728 		size_in_bytes -= cur_size_in_bytes;
3729 		control = 0;
3730 		if (size_in_bytes == 0)
3731 			control |= PACKET3_DMA_DATA_CP_SYNC;
3732 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3733 		radeon_ring_write(ring, control);
3734 		radeon_ring_write(ring, lower_32_bits(src_offset));
3735 		radeon_ring_write(ring, upper_32_bits(src_offset));
3736 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3737 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3738 		radeon_ring_write(ring, cur_size_in_bytes);
3739 		src_offset += cur_size_in_bytes;
3740 		dst_offset += cur_size_in_bytes;
3741 	}
3742 
3743 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3744 	if (r) {
3745 		radeon_ring_unlock_undo(rdev, ring);
3746 		radeon_sync_free(rdev, &sync, NULL);
3747 		return ERR_PTR(r);
3748 	}
3749 
3750 	radeon_ring_unlock_commit(rdev, ring, false);
3751 	radeon_sync_free(rdev, &sync, fence);
3752 
3753 	return fence;
3754 }
3755 
3756 /*
3757  * IB stuff
3758  */
3759 /**
3760  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ib: radeon indirect buffer object
3764  *
3765  * Emits a DE (drawing engine) or CE (constant engine) IB
3766  * on the gfx ring.  IBs are usually generated by userspace
3767  * acceleration drivers and submitted to the kernel for
3768  * scheduling on the ring.  This function schedules the IB
3769  * on the gfx ring for execution by the GPU.
3770  */
3771 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3772 {
3773 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3774 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3775 	u32 header, control = INDIRECT_BUFFER_VALID;
3776 
3777 	if (ib->is_const_ib) {
3778 		/* set switch buffer packet before const IB */
3779 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3780 		radeon_ring_write(ring, 0);
3781 
3782 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3783 	} else {
3784 		u32 next_rptr;
3785 		if (ring->rptr_save_reg) {
3786 			next_rptr = ring->wptr + 3 + 4;
3787 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3788 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3789 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3790 			radeon_ring_write(ring, next_rptr);
3791 		} else if (rdev->wb.enabled) {
3792 			next_rptr = ring->wptr + 5 + 4;
3793 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3794 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3795 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3796 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3797 			radeon_ring_write(ring, next_rptr);
3798 		}
3799 
3800 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3801 	}
3802 
3803 	control |= ib->length_dw | (vm_id << 24);
3804 
3805 	radeon_ring_write(ring, header);
3806 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3807 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3808 	radeon_ring_write(ring, control);
3809 }
3810 
3811 /**
3812  * cik_ib_test - basic gfx ring IB test
3813  *
3814  * @rdev: radeon_device pointer
3815  * @ring: radeon_ring structure holding ring information
3816  *
3817  * Allocate an IB and execute it on the gfx ring (CIK).
3818  * Provides a basic gfx ring test to verify that IBs are working.
3819  * Returns 0 on success, error on failure.
3820  */
3821 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3822 {
3823 	struct radeon_ib ib;
3824 	uint32_t scratch;
3825 	uint32_t tmp = 0;
3826 	unsigned i;
3827 	int r;
3828 
3829 	r = radeon_scratch_get(rdev, &scratch);
3830 	if (r) {
3831 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3832 		return r;
3833 	}
3834 	WREG32(scratch, 0xCAFEDEAD);
3835 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3836 	if (r) {
3837 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3838 		radeon_scratch_free(rdev, scratch);
3839 		return r;
3840 	}
3841 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3842 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3843 	ib.ptr[2] = 0xDEADBEEF;
3844 	ib.length_dw = 3;
3845 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3846 	if (r) {
3847 		radeon_scratch_free(rdev, scratch);
3848 		radeon_ib_free(rdev, &ib);
3849 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3850 		return r;
3851 	}
3852 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3853 		RADEON_USEC_IB_TEST_TIMEOUT));
3854 	if (r < 0) {
3855 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3856 		radeon_scratch_free(rdev, scratch);
3857 		radeon_ib_free(rdev, &ib);
3858 		return r;
3859 	} else if (r == 0) {
3860 		DRM_ERROR("radeon: fence wait timed out.\n");
3861 		radeon_scratch_free(rdev, scratch);
3862 		radeon_ib_free(rdev, &ib);
3863 		return -ETIMEDOUT;
3864 	}
3865 	r = 0;
3866 	for (i = 0; i < rdev->usec_timeout; i++) {
3867 		tmp = RREG32(scratch);
3868 		if (tmp == 0xDEADBEEF)
3869 			break;
3870 		DRM_UDELAY(1);
3871 	}
3872 	if (i < rdev->usec_timeout) {
3873 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3874 	} else {
3875 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3876 			  scratch, tmp);
3877 		r = -EINVAL;
3878 	}
3879 	radeon_scratch_free(rdev, scratch);
3880 	radeon_ib_free(rdev, &ib);
3881 	return r;
3882 }
3883 
3884 /*
3885  * CP.
3886  * On CIK, gfx and compute now have independant command processors.
3887  *
3888  * GFX
3889  * Gfx consists of a single ring and can process both gfx jobs and
3890  * compute jobs.  The gfx CP consists of three microengines (ME):
3891  * PFP - Pre-Fetch Parser
3892  * ME - Micro Engine
3893  * CE - Constant Engine
3894  * The PFP and ME make up what is considered the Drawing Engine (DE).
3895  * The CE is an asynchronous engine used for updating buffer desciptors
3896  * used by the DE so that they can be loaded into cache in parallel
3897  * while the DE is processing state update packets.
3898  *
3899  * Compute
3900  * The compute CP consists of two microengines (ME):
3901  * MEC1 - Compute MicroEngine 1
3902  * MEC2 - Compute MicroEngine 2
3903  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3904  * The queues are exposed to userspace and are programmed directly
3905  * by the compute runtime.
3906  */
3907 /**
3908  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3909  *
3910  * @rdev: radeon_device pointer
3911  * @enable: enable or disable the MEs
3912  *
3913  * Halts or unhalts the gfx MEs.
3914  */
3915 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3916 {
3917 	if (enable)
3918 		WREG32(CP_ME_CNTL, 0);
3919 	else {
3920 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3921 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3922 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3923 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3924 	}
3925 	udelay(50);
3926 }
3927 
3928 /**
3929  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3930  *
3931  * @rdev: radeon_device pointer
3932  *
3933  * Loads the gfx PFP, ME, and CE ucode.
3934  * Returns 0 for success, -EINVAL if the ucode is not available.
3935  */
3936 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3937 {
3938 	int i;
3939 
3940 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3941 		return -EINVAL;
3942 
3943 	cik_cp_gfx_enable(rdev, false);
3944 
3945 	if (rdev->new_fw) {
3946 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3947 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3948 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3949 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3950 		const struct gfx_firmware_header_v1_0 *me_hdr =
3951 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3952 		const __le32 *fw_data;
3953 		u32 fw_size;
3954 
3955 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3956 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3957 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3958 
3959 		/* PFP */
3960 		fw_data = (const __le32 *)
3961 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3962 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3963 		WREG32(CP_PFP_UCODE_ADDR, 0);
3964 		for (i = 0; i < fw_size; i++)
3965 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3966 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3967 
3968 		/* CE */
3969 		fw_data = (const __le32 *)
3970 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3971 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3972 		WREG32(CP_CE_UCODE_ADDR, 0);
3973 		for (i = 0; i < fw_size; i++)
3974 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3975 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3976 
3977 		/* ME */
3978 		fw_data = (const __be32 *)
3979 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3980 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3981 		WREG32(CP_ME_RAM_WADDR, 0);
3982 		for (i = 0; i < fw_size; i++)
3983 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3984 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3986 	} else {
3987 		const __be32 *fw_data;
3988 
3989 		/* PFP */
3990 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3991 		WREG32(CP_PFP_UCODE_ADDR, 0);
3992 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3993 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3994 		WREG32(CP_PFP_UCODE_ADDR, 0);
3995 
3996 		/* CE */
3997 		fw_data = (const __be32 *)rdev->ce_fw->data;
3998 		WREG32(CP_CE_UCODE_ADDR, 0);
3999 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4000 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4001 		WREG32(CP_CE_UCODE_ADDR, 0);
4002 
4003 		/* ME */
4004 		fw_data = (const __be32 *)rdev->me_fw->data;
4005 		WREG32(CP_ME_RAM_WADDR, 0);
4006 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4007 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4008 		WREG32(CP_ME_RAM_WADDR, 0);
4009 	}
4010 
4011 	return 0;
4012 }
4013 
4014 /**
4015  * cik_cp_gfx_start - start the gfx ring
4016  *
4017  * @rdev: radeon_device pointer
4018  *
4019  * Enables the ring and loads the clear state context and other
4020  * packets required to init the ring.
4021  * Returns 0 for success, error for failure.
4022  */
4023 static int cik_cp_gfx_start(struct radeon_device *rdev)
4024 {
4025 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026 	int r, i;
4027 
4028 	/* init the CP */
4029 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4030 	WREG32(CP_ENDIAN_SWAP, 0);
4031 	WREG32(CP_DEVICE_ID, 1);
4032 
4033 	cik_cp_gfx_enable(rdev, true);
4034 
4035 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4036 	if (r) {
4037 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4038 		return r;
4039 	}
4040 
4041 	/* init the CE partitions.  CE only used for gfx on CIK */
4042 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4043 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4044 	radeon_ring_write(ring, 0x8000);
4045 	radeon_ring_write(ring, 0x8000);
4046 
4047 	/* setup clear context state */
4048 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4049 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4050 
4051 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4052 	radeon_ring_write(ring, 0x80000000);
4053 	radeon_ring_write(ring, 0x80000000);
4054 
4055 	for (i = 0; i < cik_default_size; i++)
4056 		radeon_ring_write(ring, cik_default_state[i]);
4057 
4058 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4059 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4060 
4061 	/* set clear context state */
4062 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4063 	radeon_ring_write(ring, 0);
4064 
4065 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4066 	radeon_ring_write(ring, 0x00000316);
4067 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4068 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4069 
4070 	radeon_ring_unlock_commit(rdev, ring, false);
4071 
4072 	return 0;
4073 }
4074 
4075 /**
4076  * cik_cp_gfx_fini - stop the gfx ring
4077  *
4078  * @rdev: radeon_device pointer
4079  *
4080  * Stop the gfx ring and tear down the driver ring
4081  * info.
4082  */
4083 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4084 {
4085 	cik_cp_gfx_enable(rdev, false);
4086 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4087 }
4088 
4089 /**
4090  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4091  *
4092  * @rdev: radeon_device pointer
4093  *
4094  * Program the location and size of the gfx ring buffer
4095  * and test it to make sure it's working.
4096  * Returns 0 for success, error for failure.
4097  */
4098 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4099 {
4100 	struct radeon_ring *ring;
4101 	u32 tmp;
4102 	u32 rb_bufsz;
4103 	u64 rb_addr;
4104 	int r;
4105 
4106 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4107 	if (rdev->family != CHIP_HAWAII)
4108 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4109 
4110 	/* Set the write pointer delay */
4111 	WREG32(CP_RB_WPTR_DELAY, 0);
4112 
4113 	/* set the RB to use vmid 0 */
4114 	WREG32(CP_RB_VMID, 0);
4115 
4116 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4117 
4118 	/* ring 0 - compute and gfx */
4119 	/* Set ring buffer size */
4120 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4121 	rb_bufsz = order_base_2(ring->ring_size / 8);
4122 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4123 #ifdef __BIG_ENDIAN
4124 	tmp |= BUF_SWAP_32BIT;
4125 #endif
4126 	WREG32(CP_RB0_CNTL, tmp);
4127 
4128 	/* Initialize the ring buffer's read and write pointers */
4129 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4130 	ring->wptr = 0;
4131 	WREG32(CP_RB0_WPTR, ring->wptr);
4132 
4133 	/* set the wb address wether it's enabled or not */
4134 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4135 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4136 
4137 	/* scratch register shadowing is no longer supported */
4138 	WREG32(SCRATCH_UMSK, 0);
4139 
4140 	if (!rdev->wb.enabled)
4141 		tmp |= RB_NO_UPDATE;
4142 
4143 	mdelay(1);
4144 	WREG32(CP_RB0_CNTL, tmp);
4145 
4146 	rb_addr = ring->gpu_addr >> 8;
4147 	WREG32(CP_RB0_BASE, rb_addr);
4148 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4149 
4150 	/* start the ring */
4151 	cik_cp_gfx_start(rdev);
4152 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4153 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4154 	if (r) {
4155 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4156 		return r;
4157 	}
4158 
4159 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4160 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4161 
4162 	return 0;
4163 }
4164 
4165 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4166 		     struct radeon_ring *ring)
4167 {
4168 	u32 rptr;
4169 
4170 	if (rdev->wb.enabled)
4171 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4172 	else
4173 		rptr = RREG32(CP_RB0_RPTR);
4174 
4175 	return rptr;
4176 }
4177 
4178 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4179 		     struct radeon_ring *ring)
4180 {
4181 	return RREG32(CP_RB0_WPTR);
4182 }
4183 
4184 void cik_gfx_set_wptr(struct radeon_device *rdev,
4185 		      struct radeon_ring *ring)
4186 {
4187 	WREG32(CP_RB0_WPTR, ring->wptr);
4188 	(void)RREG32(CP_RB0_WPTR);
4189 }
4190 
4191 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4192 			 struct radeon_ring *ring)
4193 {
4194 	u32 rptr;
4195 
4196 	if (rdev->wb.enabled) {
4197 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4198 	} else {
4199 		mutex_lock(&rdev->srbm_mutex);
4200 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4201 		rptr = RREG32(CP_HQD_PQ_RPTR);
4202 		cik_srbm_select(rdev, 0, 0, 0, 0);
4203 		mutex_unlock(&rdev->srbm_mutex);
4204 	}
4205 
4206 	return rptr;
4207 }
4208 
4209 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4210 			 struct radeon_ring *ring)
4211 {
4212 	u32 wptr;
4213 
4214 	if (rdev->wb.enabled) {
4215 		/* XXX check if swapping is necessary on BE */
4216 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4217 	} else {
4218 		mutex_lock(&rdev->srbm_mutex);
4219 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4220 		wptr = RREG32(CP_HQD_PQ_WPTR);
4221 		cik_srbm_select(rdev, 0, 0, 0, 0);
4222 		mutex_unlock(&rdev->srbm_mutex);
4223 	}
4224 
4225 	return wptr;
4226 }
4227 
4228 void cik_compute_set_wptr(struct radeon_device *rdev,
4229 			  struct radeon_ring *ring)
4230 {
4231 	/* XXX check if swapping is necessary on BE */
4232 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4233 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4234 }
4235 
4236 static void cik_compute_stop(struct radeon_device *rdev,
4237 			     struct radeon_ring *ring)
4238 {
4239 	u32 j, tmp;
4240 
4241 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4242 	/* Disable wptr polling. */
4243 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4244 	tmp &= ~WPTR_POLL_EN;
4245 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4246 	/* Disable HQD. */
4247 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4248 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4249 		for (j = 0; j < rdev->usec_timeout; j++) {
4250 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4251 				break;
4252 			udelay(1);
4253 		}
4254 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4255 		WREG32(CP_HQD_PQ_RPTR, 0);
4256 		WREG32(CP_HQD_PQ_WPTR, 0);
4257 	}
4258 	cik_srbm_select(rdev, 0, 0, 0, 0);
4259 }
4260 
4261 /**
4262  * cik_cp_compute_enable - enable/disable the compute CP MEs
4263  *
4264  * @rdev: radeon_device pointer
4265  * @enable: enable or disable the MEs
4266  *
4267  * Halts or unhalts the compute MEs.
4268  */
4269 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4270 {
4271 	if (enable)
4272 		WREG32(CP_MEC_CNTL, 0);
4273 	else {
4274 		/*
4275 		 * To make hibernation reliable we need to clear compute ring
4276 		 * configuration before halting the compute ring.
4277 		 */
4278 		mutex_lock(&rdev->srbm_mutex);
4279 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4280 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4281 		mutex_unlock(&rdev->srbm_mutex);
4282 
4283 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4284 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4285 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4286 	}
4287 	udelay(50);
4288 }
4289 
4290 /**
4291  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Loads the compute MEC1&2 ucode.
4296  * Returns 0 for success, -EINVAL if the ucode is not available.
4297  */
4298 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4299 {
4300 	int i;
4301 
4302 	if (!rdev->mec_fw)
4303 		return -EINVAL;
4304 
4305 	cik_cp_compute_enable(rdev, false);
4306 
4307 	if (rdev->new_fw) {
4308 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4309 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4310 		const __le32 *fw_data;
4311 		u32 fw_size;
4312 
4313 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4314 
4315 		/* MEC1 */
4316 		fw_data = (const __le32 *)
4317 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4318 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4319 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4320 		for (i = 0; i < fw_size; i++)
4321 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4322 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4323 
4324 		/* MEC2 */
4325 		if (rdev->family == CHIP_KAVERI) {
4326 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4327 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4328 
4329 			fw_data = (const __le32 *)
4330 				(rdev->mec2_fw->data +
4331 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4332 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4333 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4334 			for (i = 0; i < fw_size; i++)
4335 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4336 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4337 		}
4338 	} else {
4339 		const __be32 *fw_data;
4340 
4341 		/* MEC1 */
4342 		fw_data = (const __be32 *)rdev->mec_fw->data;
4343 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4345 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4346 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347 
4348 		if (rdev->family == CHIP_KAVERI) {
4349 			/* MEC2 */
4350 			fw_data = (const __be32 *)rdev->mec_fw->data;
4351 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4352 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4353 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4354 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4355 		}
4356 	}
4357 
4358 	return 0;
4359 }
4360 
4361 /**
4362  * cik_cp_compute_start - start the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Enable the compute queues.
4367  * Returns 0 for success, error for failure.
4368  */
4369 static int cik_cp_compute_start(struct radeon_device *rdev)
4370 {
4371 	cik_cp_compute_enable(rdev, true);
4372 
4373 	return 0;
4374 }
4375 
4376 /**
4377  * cik_cp_compute_fini - stop the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Stop the compute queues and tear down the driver queue
4382  * info.
4383  */
4384 static void cik_cp_compute_fini(struct radeon_device *rdev)
4385 {
4386 	int i, idx, r;
4387 
4388 	cik_cp_compute_enable(rdev, false);
4389 
4390 	for (i = 0; i < 2; i++) {
4391 		if (i == 0)
4392 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4393 		else
4394 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4395 
4396 		if (rdev->ring[idx].mqd_obj) {
4397 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4398 			if (unlikely(r != 0))
4399 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4400 
4401 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4402 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4403 
4404 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4405 			rdev->ring[idx].mqd_obj = NULL;
4406 		}
4407 	}
4408 }
4409 
4410 static void cik_mec_fini(struct radeon_device *rdev)
4411 {
4412 	int r;
4413 
4414 	if (rdev->mec.hpd_eop_obj) {
4415 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4416 		if (unlikely(r != 0))
4417 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4418 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4419 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4420 
4421 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4422 		rdev->mec.hpd_eop_obj = NULL;
4423 	}
4424 }
4425 
4426 #define MEC_HPD_SIZE 2048
4427 
4428 static int cik_mec_init(struct radeon_device *rdev)
4429 {
4430 	int r;
4431 	u32 *hpd;
4432 
4433 	/*
4434 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4435 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4436 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4437 	 * be handled by KFD
4438 	 */
4439 	rdev->mec.num_mec = 1;
4440 	rdev->mec.num_pipe = 1;
4441 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4442 
4443 	if (rdev->mec.hpd_eop_obj == NULL) {
4444 		r = radeon_bo_create(rdev,
4445 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4446 				     PAGE_SIZE, true,
4447 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4448 				     &rdev->mec.hpd_eop_obj);
4449 		if (r) {
4450 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4451 			return r;
4452 		}
4453 	}
4454 
4455 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4456 	if (unlikely(r != 0)) {
4457 		cik_mec_fini(rdev);
4458 		return r;
4459 	}
4460 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4461 			  &rdev->mec.hpd_eop_gpu_addr);
4462 	if (r) {
4463 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4464 		cik_mec_fini(rdev);
4465 		return r;
4466 	}
4467 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4468 	if (r) {
4469 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4470 		cik_mec_fini(rdev);
4471 		return r;
4472 	}
4473 
4474 	/* clear memory.  Not sure if this is required or not */
4475 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4476 
4477 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4478 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4479 
4480 	return 0;
4481 }
4482 
4483 struct hqd_registers
4484 {
4485 	u32 cp_mqd_base_addr;
4486 	u32 cp_mqd_base_addr_hi;
4487 	u32 cp_hqd_active;
4488 	u32 cp_hqd_vmid;
4489 	u32 cp_hqd_persistent_state;
4490 	u32 cp_hqd_pipe_priority;
4491 	u32 cp_hqd_queue_priority;
4492 	u32 cp_hqd_quantum;
4493 	u32 cp_hqd_pq_base;
4494 	u32 cp_hqd_pq_base_hi;
4495 	u32 cp_hqd_pq_rptr;
4496 	u32 cp_hqd_pq_rptr_report_addr;
4497 	u32 cp_hqd_pq_rptr_report_addr_hi;
4498 	u32 cp_hqd_pq_wptr_poll_addr;
4499 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4500 	u32 cp_hqd_pq_doorbell_control;
4501 	u32 cp_hqd_pq_wptr;
4502 	u32 cp_hqd_pq_control;
4503 	u32 cp_hqd_ib_base_addr;
4504 	u32 cp_hqd_ib_base_addr_hi;
4505 	u32 cp_hqd_ib_rptr;
4506 	u32 cp_hqd_ib_control;
4507 	u32 cp_hqd_iq_timer;
4508 	u32 cp_hqd_iq_rptr;
4509 	u32 cp_hqd_dequeue_request;
4510 	u32 cp_hqd_dma_offload;
4511 	u32 cp_hqd_sema_cmd;
4512 	u32 cp_hqd_msg_type;
4513 	u32 cp_hqd_atomic0_preop_lo;
4514 	u32 cp_hqd_atomic0_preop_hi;
4515 	u32 cp_hqd_atomic1_preop_lo;
4516 	u32 cp_hqd_atomic1_preop_hi;
4517 	u32 cp_hqd_hq_scheduler0;
4518 	u32 cp_hqd_hq_scheduler1;
4519 	u32 cp_mqd_control;
4520 };
4521 
4522 struct bonaire_mqd
4523 {
4524 	u32 header;
4525 	u32 dispatch_initiator;
4526 	u32 dimensions[3];
4527 	u32 start_idx[3];
4528 	u32 num_threads[3];
4529 	u32 pipeline_stat_enable;
4530 	u32 perf_counter_enable;
4531 	u32 pgm[2];
4532 	u32 tba[2];
4533 	u32 tma[2];
4534 	u32 pgm_rsrc[2];
4535 	u32 vmid;
4536 	u32 resource_limits;
4537 	u32 static_thread_mgmt01[2];
4538 	u32 tmp_ring_size;
4539 	u32 static_thread_mgmt23[2];
4540 	u32 restart[3];
4541 	u32 thread_trace_enable;
4542 	u32 reserved1;
4543 	u32 user_data[16];
4544 	u32 vgtcs_invoke_count[2];
4545 	struct hqd_registers queue_state;
4546 	u32 dequeue_cntr;
4547 	u32 interrupt_queue[64];
4548 };
4549 
4550 /**
4551  * cik_cp_compute_resume - setup the compute queue registers
4552  *
4553  * @rdev: radeon_device pointer
4554  *
4555  * Program the compute queues and test them to make sure they
4556  * are working.
4557  * Returns 0 for success, error for failure.
4558  */
4559 static int cik_cp_compute_resume(struct radeon_device *rdev)
4560 {
4561 	int r, i, j, idx;
4562 	u32 tmp;
4563 	bool use_doorbell = true;
4564 	u64 hqd_gpu_addr;
4565 	u64 mqd_gpu_addr;
4566 	u64 eop_gpu_addr;
4567 	u64 wb_gpu_addr;
4568 	u32 *buf;
4569 	struct bonaire_mqd *mqd;
4570 
4571 	r = cik_cp_compute_start(rdev);
4572 	if (r)
4573 		return r;
4574 
4575 	/* fix up chicken bits */
4576 	tmp = RREG32(CP_CPF_DEBUG);
4577 	tmp |= (1 << 23);
4578 	WREG32(CP_CPF_DEBUG, tmp);
4579 
4580 	/* init the pipes */
4581 	mutex_lock(&rdev->srbm_mutex);
4582 
4583 	for (i = 0; i < rdev->mec.num_pipe; ++i) {
4584 		cik_srbm_select(rdev, 0, i, 0, 0);
4585 
4586 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4587 		/* write the EOP addr */
4588 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4589 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4590 
4591 		/* set the VMID assigned */
4592 		WREG32(CP_HPD_EOP_VMID, 0);
4593 
4594 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4595 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4596 		tmp &= ~EOP_SIZE_MASK;
4597 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4598 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4599 
4600 	}
4601 	mutex_unlock(&rdev->srbm_mutex);
4602 
4603 	/* init the queues.  Just two for now. */
4604 	for (i = 0; i < 2; i++) {
4605 		if (i == 0)
4606 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4607 		else
4608 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4609 
4610 		if (rdev->ring[idx].mqd_obj == NULL) {
4611 			r = radeon_bo_create(rdev,
4612 					     sizeof(struct bonaire_mqd),
4613 					     PAGE_SIZE, true,
4614 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4615 					     NULL, &rdev->ring[idx].mqd_obj);
4616 			if (r) {
4617 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4618 				return r;
4619 			}
4620 		}
4621 
4622 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4623 		if (unlikely(r != 0)) {
4624 			cik_cp_compute_fini(rdev);
4625 			return r;
4626 		}
4627 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4628 				  &mqd_gpu_addr);
4629 		if (r) {
4630 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4631 			cik_cp_compute_fini(rdev);
4632 			return r;
4633 		}
4634 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4635 		if (r) {
4636 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4637 			cik_cp_compute_fini(rdev);
4638 			return r;
4639 		}
4640 
4641 		/* init the mqd struct */
4642 		memset(buf, 0, sizeof(struct bonaire_mqd));
4643 
4644 		mqd = (struct bonaire_mqd *)buf;
4645 		mqd->header = 0xC0310800;
4646 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4647 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4648 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4649 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4650 
4651 		mutex_lock(&rdev->srbm_mutex);
4652 		cik_srbm_select(rdev, rdev->ring[idx].me,
4653 				rdev->ring[idx].pipe,
4654 				rdev->ring[idx].queue, 0);
4655 
4656 		/* disable wptr polling */
4657 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4658 		tmp &= ~WPTR_POLL_EN;
4659 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4660 
4661 		/* enable doorbell? */
4662 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4663 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4664 		if (use_doorbell)
4665 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4666 		else
4667 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4668 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4669 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4670 
4671 		/* disable the queue if it's active */
4672 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4673 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4674 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4675 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4676 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4677 			for (j = 0; j < rdev->usec_timeout; j++) {
4678 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4679 					break;
4680 				udelay(1);
4681 			}
4682 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4683 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4684 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4685 		}
4686 
4687 		/* set the pointer to the MQD */
4688 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4689 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4690 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4691 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4692 		/* set MQD vmid to 0 */
4693 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4694 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4695 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4696 
4697 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4698 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4699 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4700 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4701 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4702 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4703 
4704 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4705 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4706 		mqd->queue_state.cp_hqd_pq_control &=
4707 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4708 
4709 		mqd->queue_state.cp_hqd_pq_control |=
4710 			order_base_2(rdev->ring[idx].ring_size / 8);
4711 		mqd->queue_state.cp_hqd_pq_control |=
4712 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4713 #ifdef __BIG_ENDIAN
4714 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4715 #endif
4716 		mqd->queue_state.cp_hqd_pq_control &=
4717 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4718 		mqd->queue_state.cp_hqd_pq_control |=
4719 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4720 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4721 
4722 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4723 		if (i == 0)
4724 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4725 		else
4726 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4727 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4728 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4729 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4730 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4731 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4732 
4733 		/* set the wb address wether it's enabled or not */
4734 		if (i == 0)
4735 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4736 		else
4737 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4738 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4739 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4740 			upper_32_bits(wb_gpu_addr) & 0xffff;
4741 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4742 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4743 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4744 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4745 
4746 		/* enable the doorbell if requested */
4747 		if (use_doorbell) {
4748 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4749 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4750 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4751 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4752 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4753 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4754 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4755 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4756 
4757 		} else {
4758 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4759 		}
4760 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4761 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4762 
4763 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4764 		rdev->ring[idx].wptr = 0;
4765 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4766 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4767 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4768 
4769 		/* set the vmid for the queue */
4770 		mqd->queue_state.cp_hqd_vmid = 0;
4771 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4772 
4773 		/* activate the queue */
4774 		mqd->queue_state.cp_hqd_active = 1;
4775 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4776 
4777 		cik_srbm_select(rdev, 0, 0, 0, 0);
4778 		mutex_unlock(&rdev->srbm_mutex);
4779 
4780 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4781 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4782 
4783 		rdev->ring[idx].ready = true;
4784 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4785 		if (r)
4786 			rdev->ring[idx].ready = false;
4787 	}
4788 
4789 	return 0;
4790 }
4791 
4792 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4793 {
4794 	cik_cp_gfx_enable(rdev, enable);
4795 	cik_cp_compute_enable(rdev, enable);
4796 }
4797 
4798 static int cik_cp_load_microcode(struct radeon_device *rdev)
4799 {
4800 	int r;
4801 
4802 	r = cik_cp_gfx_load_microcode(rdev);
4803 	if (r)
4804 		return r;
4805 	r = cik_cp_compute_load_microcode(rdev);
4806 	if (r)
4807 		return r;
4808 
4809 	return 0;
4810 }
4811 
4812 static void cik_cp_fini(struct radeon_device *rdev)
4813 {
4814 	cik_cp_gfx_fini(rdev);
4815 	cik_cp_compute_fini(rdev);
4816 }
4817 
4818 static int cik_cp_resume(struct radeon_device *rdev)
4819 {
4820 	int r;
4821 
4822 	cik_enable_gui_idle_interrupt(rdev, false);
4823 
4824 	r = cik_cp_load_microcode(rdev);
4825 	if (r)
4826 		return r;
4827 
4828 	r = cik_cp_gfx_resume(rdev);
4829 	if (r)
4830 		return r;
4831 	r = cik_cp_compute_resume(rdev);
4832 	if (r)
4833 		return r;
4834 
4835 	cik_enable_gui_idle_interrupt(rdev, true);
4836 
4837 	return 0;
4838 }
4839 
4840 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4841 {
4842 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4843 		RREG32(GRBM_STATUS));
4844 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4845 		RREG32(GRBM_STATUS2));
4846 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4847 		RREG32(GRBM_STATUS_SE0));
4848 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4849 		RREG32(GRBM_STATUS_SE1));
4850 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4851 		RREG32(GRBM_STATUS_SE2));
4852 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4853 		RREG32(GRBM_STATUS_SE3));
4854 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4855 		RREG32(SRBM_STATUS));
4856 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4857 		RREG32(SRBM_STATUS2));
4858 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4859 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4860 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4861 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4862 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4863 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4864 		 RREG32(CP_STALLED_STAT1));
4865 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4866 		 RREG32(CP_STALLED_STAT2));
4867 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4868 		 RREG32(CP_STALLED_STAT3));
4869 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4870 		 RREG32(CP_CPF_BUSY_STAT));
4871 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4872 		 RREG32(CP_CPF_STALLED_STAT1));
4873 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4874 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4875 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4876 		 RREG32(CP_CPC_STALLED_STAT1));
4877 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4878 }
4879 
4880 /**
4881  * cik_gpu_check_soft_reset - check which blocks are busy
4882  *
4883  * @rdev: radeon_device pointer
4884  *
4885  * Check which blocks are busy and return the relevant reset
4886  * mask to be used by cik_gpu_soft_reset().
4887  * Returns a mask of the blocks to be reset.
4888  */
4889 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4890 {
4891 	u32 reset_mask = 0;
4892 	u32 tmp;
4893 
4894 	/* GRBM_STATUS */
4895 	tmp = RREG32(GRBM_STATUS);
4896 	if (tmp & (PA_BUSY | SC_BUSY |
4897 		   BCI_BUSY | SX_BUSY |
4898 		   TA_BUSY | VGT_BUSY |
4899 		   DB_BUSY | CB_BUSY |
4900 		   GDS_BUSY | SPI_BUSY |
4901 		   IA_BUSY | IA_BUSY_NO_DMA))
4902 		reset_mask |= RADEON_RESET_GFX;
4903 
4904 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4905 		reset_mask |= RADEON_RESET_CP;
4906 
4907 	/* GRBM_STATUS2 */
4908 	tmp = RREG32(GRBM_STATUS2);
4909 	if (tmp & RLC_BUSY)
4910 		reset_mask |= RADEON_RESET_RLC;
4911 
4912 	/* SDMA0_STATUS_REG */
4913 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4914 	if (!(tmp & SDMA_IDLE))
4915 		reset_mask |= RADEON_RESET_DMA;
4916 
4917 	/* SDMA1_STATUS_REG */
4918 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4919 	if (!(tmp & SDMA_IDLE))
4920 		reset_mask |= RADEON_RESET_DMA1;
4921 
4922 	/* SRBM_STATUS2 */
4923 	tmp = RREG32(SRBM_STATUS2);
4924 	if (tmp & SDMA_BUSY)
4925 		reset_mask |= RADEON_RESET_DMA;
4926 
4927 	if (tmp & SDMA1_BUSY)
4928 		reset_mask |= RADEON_RESET_DMA1;
4929 
4930 	/* SRBM_STATUS */
4931 	tmp = RREG32(SRBM_STATUS);
4932 
4933 	if (tmp & IH_BUSY)
4934 		reset_mask |= RADEON_RESET_IH;
4935 
4936 	if (tmp & SEM_BUSY)
4937 		reset_mask |= RADEON_RESET_SEM;
4938 
4939 	if (tmp & GRBM_RQ_PENDING)
4940 		reset_mask |= RADEON_RESET_GRBM;
4941 
4942 	if (tmp & VMC_BUSY)
4943 		reset_mask |= RADEON_RESET_VMC;
4944 
4945 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4946 		   MCC_BUSY | MCD_BUSY))
4947 		reset_mask |= RADEON_RESET_MC;
4948 
4949 	if (evergreen_is_display_hung(rdev))
4950 		reset_mask |= RADEON_RESET_DISPLAY;
4951 
4952 	/* Skip MC reset as it's mostly likely not hung, just busy */
4953 	if (reset_mask & RADEON_RESET_MC) {
4954 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4955 		reset_mask &= ~RADEON_RESET_MC;
4956 	}
4957 
4958 	return reset_mask;
4959 }
4960 
4961 /**
4962  * cik_gpu_soft_reset - soft reset GPU
4963  *
4964  * @rdev: radeon_device pointer
4965  * @reset_mask: mask of which blocks to reset
4966  *
4967  * Soft reset the blocks specified in @reset_mask.
4968  */
4969 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4970 {
4971 	struct evergreen_mc_save save;
4972 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4973 	u32 tmp;
4974 
4975 	if (reset_mask == 0)
4976 		return;
4977 
4978 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4979 
4980 	cik_print_gpu_status_regs(rdev);
4981 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4982 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4983 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4984 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4985 
4986 	/* disable CG/PG */
4987 	cik_fini_pg(rdev);
4988 	cik_fini_cg(rdev);
4989 
4990 	/* stop the rlc */
4991 	cik_rlc_stop(rdev);
4992 
4993 	/* Disable GFX parsing/prefetching */
4994 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4995 
4996 	/* Disable MEC parsing/prefetching */
4997 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4998 
4999 	if (reset_mask & RADEON_RESET_DMA) {
5000 		/* sdma0 */
5001 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5002 		tmp |= SDMA_HALT;
5003 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5004 	}
5005 	if (reset_mask & RADEON_RESET_DMA1) {
5006 		/* sdma1 */
5007 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5008 		tmp |= SDMA_HALT;
5009 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5010 	}
5011 
5012 	evergreen_mc_stop(rdev, &save);
5013 	if (evergreen_mc_wait_for_idle(rdev)) {
5014 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5015 	}
5016 
5017 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5018 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5019 
5020 	if (reset_mask & RADEON_RESET_CP) {
5021 		grbm_soft_reset |= SOFT_RESET_CP;
5022 
5023 		srbm_soft_reset |= SOFT_RESET_GRBM;
5024 	}
5025 
5026 	if (reset_mask & RADEON_RESET_DMA)
5027 		srbm_soft_reset |= SOFT_RESET_SDMA;
5028 
5029 	if (reset_mask & RADEON_RESET_DMA1)
5030 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5031 
5032 	if (reset_mask & RADEON_RESET_DISPLAY)
5033 		srbm_soft_reset |= SOFT_RESET_DC;
5034 
5035 	if (reset_mask & RADEON_RESET_RLC)
5036 		grbm_soft_reset |= SOFT_RESET_RLC;
5037 
5038 	if (reset_mask & RADEON_RESET_SEM)
5039 		srbm_soft_reset |= SOFT_RESET_SEM;
5040 
5041 	if (reset_mask & RADEON_RESET_IH)
5042 		srbm_soft_reset |= SOFT_RESET_IH;
5043 
5044 	if (reset_mask & RADEON_RESET_GRBM)
5045 		srbm_soft_reset |= SOFT_RESET_GRBM;
5046 
5047 	if (reset_mask & RADEON_RESET_VMC)
5048 		srbm_soft_reset |= SOFT_RESET_VMC;
5049 
5050 	if (!(rdev->flags & RADEON_IS_IGP)) {
5051 		if (reset_mask & RADEON_RESET_MC)
5052 			srbm_soft_reset |= SOFT_RESET_MC;
5053 	}
5054 
5055 	if (grbm_soft_reset) {
5056 		tmp = RREG32(GRBM_SOFT_RESET);
5057 		tmp |= grbm_soft_reset;
5058 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5059 		WREG32(GRBM_SOFT_RESET, tmp);
5060 		tmp = RREG32(GRBM_SOFT_RESET);
5061 
5062 		udelay(50);
5063 
5064 		tmp &= ~grbm_soft_reset;
5065 		WREG32(GRBM_SOFT_RESET, tmp);
5066 		tmp = RREG32(GRBM_SOFT_RESET);
5067 	}
5068 
5069 	if (srbm_soft_reset) {
5070 		tmp = RREG32(SRBM_SOFT_RESET);
5071 		tmp |= srbm_soft_reset;
5072 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5073 		WREG32(SRBM_SOFT_RESET, tmp);
5074 		tmp = RREG32(SRBM_SOFT_RESET);
5075 
5076 		udelay(50);
5077 
5078 		tmp &= ~srbm_soft_reset;
5079 		WREG32(SRBM_SOFT_RESET, tmp);
5080 		tmp = RREG32(SRBM_SOFT_RESET);
5081 	}
5082 
5083 	/* Wait a little for things to settle down */
5084 	udelay(50);
5085 
5086 	evergreen_mc_resume(rdev, &save);
5087 	udelay(50);
5088 
5089 	cik_print_gpu_status_regs(rdev);
5090 }
5091 
5092 struct kv_reset_save_regs {
5093 	u32 gmcon_reng_execute;
5094 	u32 gmcon_misc;
5095 	u32 gmcon_misc3;
5096 };
5097 
5098 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5099 				   struct kv_reset_save_regs *save)
5100 {
5101 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5102 	save->gmcon_misc = RREG32(GMCON_MISC);
5103 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5104 
5105 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5106 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5107 						STCTRL_STUTTER_EN));
5108 }
5109 
5110 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5111 				      struct kv_reset_save_regs *save)
5112 {
5113 	int i;
5114 
5115 	WREG32(GMCON_PGFSM_WRITE, 0);
5116 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5117 
5118 	for (i = 0; i < 5; i++)
5119 		WREG32(GMCON_PGFSM_WRITE, 0);
5120 
5121 	WREG32(GMCON_PGFSM_WRITE, 0);
5122 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5123 
5124 	for (i = 0; i < 5; i++)
5125 		WREG32(GMCON_PGFSM_WRITE, 0);
5126 
5127 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5128 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5129 
5130 	for (i = 0; i < 5; i++)
5131 		WREG32(GMCON_PGFSM_WRITE, 0);
5132 
5133 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5134 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5135 
5136 	for (i = 0; i < 5; i++)
5137 		WREG32(GMCON_PGFSM_WRITE, 0);
5138 
5139 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5140 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5141 
5142 	for (i = 0; i < 5; i++)
5143 		WREG32(GMCON_PGFSM_WRITE, 0);
5144 
5145 	WREG32(GMCON_PGFSM_WRITE, 0);
5146 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5147 
5148 	for (i = 0; i < 5; i++)
5149 		WREG32(GMCON_PGFSM_WRITE, 0);
5150 
5151 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5152 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5153 
5154 	for (i = 0; i < 5; i++)
5155 		WREG32(GMCON_PGFSM_WRITE, 0);
5156 
5157 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5158 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5159 
5160 	for (i = 0; i < 5; i++)
5161 		WREG32(GMCON_PGFSM_WRITE, 0);
5162 
5163 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5164 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5165 
5166 	for (i = 0; i < 5; i++)
5167 		WREG32(GMCON_PGFSM_WRITE, 0);
5168 
5169 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5170 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5171 
5172 	for (i = 0; i < 5; i++)
5173 		WREG32(GMCON_PGFSM_WRITE, 0);
5174 
5175 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5176 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5177 
5178 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5179 	WREG32(GMCON_MISC, save->gmcon_misc);
5180 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5181 }
5182 
5183 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5184 {
5185 	struct evergreen_mc_save save;
5186 	struct kv_reset_save_regs kv_save = { 0 };
5187 	u32 tmp, i;
5188 
5189 	dev_info(rdev->dev, "GPU pci config reset\n");
5190 
5191 	/* disable dpm? */
5192 
5193 	/* disable cg/pg */
5194 	cik_fini_pg(rdev);
5195 	cik_fini_cg(rdev);
5196 
5197 	/* Disable GFX parsing/prefetching */
5198 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5199 
5200 	/* Disable MEC parsing/prefetching */
5201 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5202 
5203 	/* sdma0 */
5204 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5205 	tmp |= SDMA_HALT;
5206 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5207 	/* sdma1 */
5208 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5209 	tmp |= SDMA_HALT;
5210 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5211 	/* XXX other engines? */
5212 
5213 	/* halt the rlc, disable cp internal ints */
5214 	cik_rlc_stop(rdev);
5215 
5216 	udelay(50);
5217 
5218 	/* disable mem access */
5219 	evergreen_mc_stop(rdev, &save);
5220 	if (evergreen_mc_wait_for_idle(rdev)) {
5221 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5222 	}
5223 
5224 	if (rdev->flags & RADEON_IS_IGP)
5225 		kv_save_regs_for_reset(rdev, &kv_save);
5226 
5227 	/* disable BM */
5228 	pci_clear_master(rdev->pdev);
5229 	/* reset */
5230 	radeon_pci_config_reset(rdev);
5231 
5232 	udelay(100);
5233 
5234 	/* wait for asic to come out of reset */
5235 	for (i = 0; i < rdev->usec_timeout; i++) {
5236 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5237 			break;
5238 		udelay(1);
5239 	}
5240 
5241 	/* does asic init need to be run first??? */
5242 	if (rdev->flags & RADEON_IS_IGP)
5243 		kv_restore_regs_for_reset(rdev, &kv_save);
5244 }
5245 
5246 /**
5247  * cik_asic_reset - soft reset GPU
5248  *
5249  * @rdev: radeon_device pointer
5250  * @hard: force hard reset
5251  *
5252  * Look up which blocks are hung and attempt
5253  * to reset them.
5254  * Returns 0 for success.
5255  */
5256 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5257 {
5258 	u32 reset_mask;
5259 
5260 	if (hard) {
5261 		cik_gpu_pci_config_reset(rdev);
5262 		return 0;
5263 	}
5264 
5265 	reset_mask = cik_gpu_check_soft_reset(rdev);
5266 
5267 	if (reset_mask)
5268 		r600_set_bios_scratch_engine_hung(rdev, true);
5269 
5270 	/* try soft reset */
5271 	cik_gpu_soft_reset(rdev, reset_mask);
5272 
5273 	reset_mask = cik_gpu_check_soft_reset(rdev);
5274 
5275 	/* try pci config reset */
5276 	if (reset_mask && radeon_hard_reset)
5277 		cik_gpu_pci_config_reset(rdev);
5278 
5279 	reset_mask = cik_gpu_check_soft_reset(rdev);
5280 
5281 	if (!reset_mask)
5282 		r600_set_bios_scratch_engine_hung(rdev, false);
5283 
5284 	return 0;
5285 }
5286 
5287 /**
5288  * cik_gfx_is_lockup - check if the 3D engine is locked up
5289  *
5290  * @rdev: radeon_device pointer
5291  * @ring: radeon_ring structure holding ring information
5292  *
5293  * Check if the 3D engine is locked up (CIK).
5294  * Returns true if the engine is locked, false if not.
5295  */
5296 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5297 {
5298 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5299 
5300 	if (!(reset_mask & (RADEON_RESET_GFX |
5301 			    RADEON_RESET_COMPUTE |
5302 			    RADEON_RESET_CP))) {
5303 		radeon_ring_lockup_update(rdev, ring);
5304 		return false;
5305 	}
5306 	return radeon_ring_test_lockup(rdev, ring);
5307 }
5308 
5309 /* MC */
5310 /**
5311  * cik_mc_program - program the GPU memory controller
5312  *
5313  * @rdev: radeon_device pointer
5314  *
5315  * Set the location of vram, gart, and AGP in the GPU's
5316  * physical address space (CIK).
5317  */
5318 static void cik_mc_program(struct radeon_device *rdev)
5319 {
5320 	struct evergreen_mc_save save;
5321 	u32 tmp;
5322 	int i, j;
5323 
5324 	/* Initialize HDP */
5325 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5326 		WREG32((0x2c14 + j), 0x00000000);
5327 		WREG32((0x2c18 + j), 0x00000000);
5328 		WREG32((0x2c1c + j), 0x00000000);
5329 		WREG32((0x2c20 + j), 0x00000000);
5330 		WREG32((0x2c24 + j), 0x00000000);
5331 	}
5332 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5333 
5334 	evergreen_mc_stop(rdev, &save);
5335 	if (radeon_mc_wait_for_idle(rdev)) {
5336 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5337 	}
5338 	/* Lockout access through VGA aperture*/
5339 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5340 	/* Update configuration */
5341 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5342 	       rdev->mc.vram_start >> 12);
5343 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5344 	       rdev->mc.vram_end >> 12);
5345 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5346 	       rdev->vram_scratch.gpu_addr >> 12);
5347 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5348 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5349 	WREG32(MC_VM_FB_LOCATION, tmp);
5350 	/* XXX double check these! */
5351 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5352 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5353 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5354 	WREG32(MC_VM_AGP_BASE, 0);
5355 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5356 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5357 	if (radeon_mc_wait_for_idle(rdev)) {
5358 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5359 	}
5360 	evergreen_mc_resume(rdev, &save);
5361 	/* we need to own VRAM, so turn off the VGA renderer here
5362 	 * to stop it overwriting our objects */
5363 	rv515_vga_render_disable(rdev);
5364 }
5365 
5366 /**
5367  * cik_mc_init - initialize the memory controller driver params
5368  *
5369  * @rdev: radeon_device pointer
5370  *
5371  * Look up the amount of vram, vram width, and decide how to place
5372  * vram and gart within the GPU's physical address space (CIK).
5373  * Returns 0 for success.
5374  */
5375 static int cik_mc_init(struct radeon_device *rdev)
5376 {
5377 	u32 tmp;
5378 	int chansize, numchan;
5379 
5380 	/* Get VRAM informations */
5381 	rdev->mc.vram_is_ddr = true;
5382 	tmp = RREG32(MC_ARB_RAMCFG);
5383 	if (tmp & CHANSIZE_MASK) {
5384 		chansize = 64;
5385 	} else {
5386 		chansize = 32;
5387 	}
5388 	tmp = RREG32(MC_SHARED_CHMAP);
5389 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5390 	case 0:
5391 	default:
5392 		numchan = 1;
5393 		break;
5394 	case 1:
5395 		numchan = 2;
5396 		break;
5397 	case 2:
5398 		numchan = 4;
5399 		break;
5400 	case 3:
5401 		numchan = 8;
5402 		break;
5403 	case 4:
5404 		numchan = 3;
5405 		break;
5406 	case 5:
5407 		numchan = 6;
5408 		break;
5409 	case 6:
5410 		numchan = 10;
5411 		break;
5412 	case 7:
5413 		numchan = 12;
5414 		break;
5415 	case 8:
5416 		numchan = 16;
5417 		break;
5418 	}
5419 	rdev->mc.vram_width = numchan * chansize;
5420 	/* Could aper size report 0 ? */
5421 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5422 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5423 	/* size in MB on si */
5424 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5426 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5427 	si_vram_gtt_location(rdev, &rdev->mc);
5428 	radeon_update_bandwidth_info(rdev);
5429 
5430 	return 0;
5431 }
5432 
5433 /*
5434  * GART
5435  * VMID 0 is the physical GPU addresses as used by the kernel.
5436  * VMIDs 1-15 are used for userspace clients and are handled
5437  * by the radeon vm/hsa code.
5438  */
5439 /**
5440  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5441  *
5442  * @rdev: radeon_device pointer
5443  *
5444  * Flush the TLB for the VMID 0 page table (CIK).
5445  */
5446 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5447 {
5448 	/* flush hdp cache */
5449 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5450 
5451 	/* bits 0-15 are the VM contexts0-15 */
5452 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5453 }
5454 
5455 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5456 {
5457 	int i;
5458 	uint32_t sh_mem_bases, sh_mem_config;
5459 
5460 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5461 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5462 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5463 
5464 	mutex_lock(&rdev->srbm_mutex);
5465 	for (i = 8; i < 16; i++) {
5466 		cik_srbm_select(rdev, 0, 0, 0, i);
5467 		/* CP and shaders */
5468 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5469 		WREG32(SH_MEM_APE1_BASE, 1);
5470 		WREG32(SH_MEM_APE1_LIMIT, 0);
5471 		WREG32(SH_MEM_BASES, sh_mem_bases);
5472 	}
5473 	cik_srbm_select(rdev, 0, 0, 0, 0);
5474 	mutex_unlock(&rdev->srbm_mutex);
5475 }
5476 
5477 /**
5478  * cik_pcie_gart_enable - gart enable
5479  *
5480  * @rdev: radeon_device pointer
5481  *
5482  * This sets up the TLBs, programs the page tables for VMID0,
5483  * sets up the hw for VMIDs 1-15 which are allocated on
5484  * demand, and sets up the global locations for the LDS, GDS,
5485  * and GPUVM for FSA64 clients (CIK).
5486  * Returns 0 for success, errors for failure.
5487  */
5488 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5489 {
5490 	int r, i;
5491 
5492 	if (rdev->gart.robj == NULL) {
5493 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5494 		return -EINVAL;
5495 	}
5496 	r = radeon_gart_table_vram_pin(rdev);
5497 	if (r)
5498 		return r;
5499 	/* Setup TLB control */
5500 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5501 	       (0xA << 7) |
5502 	       ENABLE_L1_TLB |
5503 	       ENABLE_L1_FRAGMENT_PROCESSING |
5504 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5505 	       ENABLE_ADVANCED_DRIVER_MODEL |
5506 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5507 	/* Setup L2 cache */
5508 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5509 	       ENABLE_L2_FRAGMENT_PROCESSING |
5510 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5511 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5512 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5513 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5514 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5515 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5516 	       BANK_SELECT(4) |
5517 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5518 	/* setup context0 */
5519 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5520 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5521 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5522 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5523 			(u32)(rdev->dummy_page.addr >> 12));
5524 	WREG32(VM_CONTEXT0_CNTL2, 0);
5525 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5526 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5527 
5528 	WREG32(0x15D4, 0);
5529 	WREG32(0x15D8, 0);
5530 	WREG32(0x15DC, 0);
5531 
5532 	/* restore context1-15 */
5533 	/* set vm size, must be a multiple of 4 */
5534 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5535 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5536 	for (i = 1; i < 16; i++) {
5537 		if (i < 8)
5538 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5539 			       rdev->vm_manager.saved_table_addr[i]);
5540 		else
5541 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5542 			       rdev->vm_manager.saved_table_addr[i]);
5543 	}
5544 
5545 	/* enable context1-15 */
5546 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5547 	       (u32)(rdev->dummy_page.addr >> 12));
5548 	WREG32(VM_CONTEXT1_CNTL2, 4);
5549 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5550 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5551 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5552 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5553 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5554 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5555 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5556 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5557 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5558 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5559 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5560 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5561 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5562 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5563 
5564 	if (rdev->family == CHIP_KAVERI) {
5565 		u32 tmp = RREG32(CHUB_CONTROL);
5566 		tmp &= ~BYPASS_VM;
5567 		WREG32(CHUB_CONTROL, tmp);
5568 	}
5569 
5570 	/* XXX SH_MEM regs */
5571 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5572 	mutex_lock(&rdev->srbm_mutex);
5573 	for (i = 0; i < 16; i++) {
5574 		cik_srbm_select(rdev, 0, 0, 0, i);
5575 		/* CP and shaders */
5576 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5577 		WREG32(SH_MEM_APE1_BASE, 1);
5578 		WREG32(SH_MEM_APE1_LIMIT, 0);
5579 		WREG32(SH_MEM_BASES, 0);
5580 		/* SDMA GFX */
5581 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5582 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5583 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5584 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5585 		/* XXX SDMA RLC - todo */
5586 	}
5587 	cik_srbm_select(rdev, 0, 0, 0, 0);
5588 	mutex_unlock(&rdev->srbm_mutex);
5589 
5590 	cik_pcie_init_compute_vmid(rdev);
5591 
5592 	cik_pcie_gart_tlb_flush(rdev);
5593 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5594 		 (unsigned)(rdev->mc.gtt_size >> 20),
5595 		 (unsigned long long)rdev->gart.table_addr);
5596 	rdev->gart.ready = true;
5597 	return 0;
5598 }
5599 
5600 /**
5601  * cik_pcie_gart_disable - gart disable
5602  *
5603  * @rdev: radeon_device pointer
5604  *
5605  * This disables all VM page table (CIK).
5606  */
5607 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5608 {
5609 	unsigned i;
5610 
5611 	for (i = 1; i < 16; ++i) {
5612 		uint32_t reg;
5613 		if (i < 8)
5614 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5615 		else
5616 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5617 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5618 	}
5619 
5620 	/* Disable all tables */
5621 	WREG32(VM_CONTEXT0_CNTL, 0);
5622 	WREG32(VM_CONTEXT1_CNTL, 0);
5623 	/* Setup TLB control */
5624 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5625 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5626 	/* Setup L2 cache */
5627 	WREG32(VM_L2_CNTL,
5628 	       ENABLE_L2_FRAGMENT_PROCESSING |
5629 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5630 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5631 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5632 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5633 	WREG32(VM_L2_CNTL2, 0);
5634 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5635 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5636 	radeon_gart_table_vram_unpin(rdev);
5637 }
5638 
5639 /**
5640  * cik_pcie_gart_fini - vm fini callback
5641  *
5642  * @rdev: radeon_device pointer
5643  *
5644  * Tears down the driver GART/VM setup (CIK).
5645  */
5646 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5647 {
5648 	cik_pcie_gart_disable(rdev);
5649 	radeon_gart_table_vram_free(rdev);
5650 	radeon_gart_fini(rdev);
5651 }
5652 
5653 /* vm parser */
5654 /**
5655  * cik_ib_parse - vm ib_parse callback
5656  *
5657  * @rdev: radeon_device pointer
5658  * @ib: indirect buffer pointer
5659  *
5660  * CIK uses hw IB checking so this is a nop (CIK).
5661  */
5662 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5663 {
5664 	return 0;
5665 }
5666 
5667 /*
5668  * vm
5669  * VMID 0 is the physical GPU addresses as used by the kernel.
5670  * VMIDs 1-15 are used for userspace clients and are handled
5671  * by the radeon vm/hsa code.
5672  */
5673 /**
5674  * cik_vm_init - cik vm init callback
5675  *
5676  * @rdev: radeon_device pointer
5677  *
5678  * Inits cik specific vm parameters (number of VMs, base of vram for
5679  * VMIDs 1-15) (CIK).
5680  * Returns 0 for success.
5681  */
5682 int cik_vm_init(struct radeon_device *rdev)
5683 {
5684 	/*
5685 	 * number of VMs
5686 	 * VMID 0 is reserved for System
5687 	 * radeon graphics/compute will use VMIDs 1-7
5688 	 * amdkfd will use VMIDs 8-15
5689 	 */
5690 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5691 	/* base offset of vram pages */
5692 	if (rdev->flags & RADEON_IS_IGP) {
5693 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5694 		tmp <<= 22;
5695 		rdev->vm_manager.vram_base_offset = tmp;
5696 	} else
5697 		rdev->vm_manager.vram_base_offset = 0;
5698 
5699 	return 0;
5700 }
5701 
5702 /**
5703  * cik_vm_fini - cik vm fini callback
5704  *
5705  * @rdev: radeon_device pointer
5706  *
5707  * Tear down any asic specific VM setup (CIK).
5708  */
5709 void cik_vm_fini(struct radeon_device *rdev)
5710 {
5711 }
5712 
5713 /**
5714  * cik_vm_decode_fault - print human readable fault info
5715  *
5716  * @rdev: radeon_device pointer
5717  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5718  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5719  *
5720  * Print human readable fault information (CIK).
5721  */
5722 static void cik_vm_decode_fault(struct radeon_device *rdev,
5723 				u32 status, u32 addr, u32 mc_client)
5724 {
5725 	u32 mc_id;
5726 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5727 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5728 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5729 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5730 
5731 	if (rdev->family == CHIP_HAWAII)
5732 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5733 	else
5734 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5735 
5736 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5737 	       protections, vmid, addr,
5738 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5739 	       block, mc_client, mc_id);
5740 }
5741 
5742 /**
5743  * cik_vm_flush - cik vm flush using the CP
5744  *
5745  * @rdev: radeon_device pointer
5746  *
5747  * Update the page table base and flush the VM TLB
5748  * using the CP (CIK).
5749  */
5750 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5751 		  unsigned vm_id, uint64_t pd_addr)
5752 {
5753 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5754 
5755 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5756 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5757 				 WRITE_DATA_DST_SEL(0)));
5758 	if (vm_id < 8) {
5759 		radeon_ring_write(ring,
5760 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5761 	} else {
5762 		radeon_ring_write(ring,
5763 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5764 	}
5765 	radeon_ring_write(ring, 0);
5766 	radeon_ring_write(ring, pd_addr >> 12);
5767 
5768 	/* update SH_MEM_* regs */
5769 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771 				 WRITE_DATA_DST_SEL(0)));
5772 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5773 	radeon_ring_write(ring, 0);
5774 	radeon_ring_write(ring, VMID(vm_id));
5775 
5776 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5777 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5778 				 WRITE_DATA_DST_SEL(0)));
5779 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5780 	radeon_ring_write(ring, 0);
5781 
5782 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5783 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5784 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5785 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5786 
5787 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5789 				 WRITE_DATA_DST_SEL(0)));
5790 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5791 	radeon_ring_write(ring, 0);
5792 	radeon_ring_write(ring, VMID(0));
5793 
5794 	/* HDP flush */
5795 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5796 
5797 	/* bits 0-15 are the VM contexts0-15 */
5798 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5799 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5800 				 WRITE_DATA_DST_SEL(0)));
5801 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5802 	radeon_ring_write(ring, 0);
5803 	radeon_ring_write(ring, 1 << vm_id);
5804 
5805 	/* wait for the invalidate to complete */
5806 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5807 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5808 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5809 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5810 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5811 	radeon_ring_write(ring, 0);
5812 	radeon_ring_write(ring, 0); /* ref */
5813 	radeon_ring_write(ring, 0); /* mask */
5814 	radeon_ring_write(ring, 0x20); /* poll interval */
5815 
5816 	/* compute doesn't have PFP */
5817 	if (usepfp) {
5818 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5819 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5820 		radeon_ring_write(ring, 0x0);
5821 	}
5822 }
5823 
5824 /*
5825  * RLC
5826  * The RLC is a multi-purpose microengine that handles a
5827  * variety of functions, the most important of which is
5828  * the interrupt controller.
5829  */
5830 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5831 					  bool enable)
5832 {
5833 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5834 
5835 	if (enable)
5836 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5837 	else
5838 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5839 	WREG32(CP_INT_CNTL_RING0, tmp);
5840 }
5841 
5842 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5843 {
5844 	u32 tmp;
5845 
5846 	tmp = RREG32(RLC_LB_CNTL);
5847 	if (enable)
5848 		tmp |= LOAD_BALANCE_ENABLE;
5849 	else
5850 		tmp &= ~LOAD_BALANCE_ENABLE;
5851 	WREG32(RLC_LB_CNTL, tmp);
5852 }
5853 
5854 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5855 {
5856 	u32 i, j, k;
5857 	u32 mask;
5858 
5859 	mutex_lock(&rdev->grbm_idx_mutex);
5860 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5861 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5862 			cik_select_se_sh(rdev, i, j);
5863 			for (k = 0; k < rdev->usec_timeout; k++) {
5864 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5865 					break;
5866 				udelay(1);
5867 			}
5868 		}
5869 	}
5870 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5871 	mutex_unlock(&rdev->grbm_idx_mutex);
5872 
5873 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5874 	for (k = 0; k < rdev->usec_timeout; k++) {
5875 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5876 			break;
5877 		udelay(1);
5878 	}
5879 }
5880 
5881 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5882 {
5883 	u32 tmp;
5884 
5885 	tmp = RREG32(RLC_CNTL);
5886 	if (tmp != rlc)
5887 		WREG32(RLC_CNTL, rlc);
5888 }
5889 
5890 static u32 cik_halt_rlc(struct radeon_device *rdev)
5891 {
5892 	u32 data, orig;
5893 
5894 	orig = data = RREG32(RLC_CNTL);
5895 
5896 	if (data & RLC_ENABLE) {
5897 		u32 i;
5898 
5899 		data &= ~RLC_ENABLE;
5900 		WREG32(RLC_CNTL, data);
5901 
5902 		for (i = 0; i < rdev->usec_timeout; i++) {
5903 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5904 				break;
5905 			udelay(1);
5906 		}
5907 
5908 		cik_wait_for_rlc_serdes(rdev);
5909 	}
5910 
5911 	return orig;
5912 }
5913 
5914 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5915 {
5916 	u32 tmp, i, mask;
5917 
5918 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5919 	WREG32(RLC_GPR_REG2, tmp);
5920 
5921 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5922 	for (i = 0; i < rdev->usec_timeout; i++) {
5923 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5924 			break;
5925 		udelay(1);
5926 	}
5927 
5928 	for (i = 0; i < rdev->usec_timeout; i++) {
5929 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5930 			break;
5931 		udelay(1);
5932 	}
5933 }
5934 
5935 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5936 {
5937 	u32 tmp;
5938 
5939 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5940 	WREG32(RLC_GPR_REG2, tmp);
5941 }
5942 
5943 /**
5944  * cik_rlc_stop - stop the RLC ME
5945  *
5946  * @rdev: radeon_device pointer
5947  *
5948  * Halt the RLC ME (MicroEngine) (CIK).
5949  */
5950 static void cik_rlc_stop(struct radeon_device *rdev)
5951 {
5952 	WREG32(RLC_CNTL, 0);
5953 
5954 	cik_enable_gui_idle_interrupt(rdev, false);
5955 
5956 	cik_wait_for_rlc_serdes(rdev);
5957 }
5958 
5959 /**
5960  * cik_rlc_start - start the RLC ME
5961  *
5962  * @rdev: radeon_device pointer
5963  *
5964  * Unhalt the RLC ME (MicroEngine) (CIK).
5965  */
5966 static void cik_rlc_start(struct radeon_device *rdev)
5967 {
5968 	WREG32(RLC_CNTL, RLC_ENABLE);
5969 
5970 	cik_enable_gui_idle_interrupt(rdev, true);
5971 
5972 	udelay(50);
5973 }
5974 
5975 /**
5976  * cik_rlc_resume - setup the RLC hw
5977  *
5978  * @rdev: radeon_device pointer
5979  *
5980  * Initialize the RLC registers, load the ucode,
5981  * and start the RLC (CIK).
5982  * Returns 0 for success, -EINVAL if the ucode is not available.
5983  */
5984 static int cik_rlc_resume(struct radeon_device *rdev)
5985 {
5986 	u32 i, size, tmp;
5987 
5988 	if (!rdev->rlc_fw)
5989 		return -EINVAL;
5990 
5991 	cik_rlc_stop(rdev);
5992 
5993 	/* disable CG */
5994 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5995 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5996 
5997 	si_rlc_reset(rdev);
5998 
5999 	cik_init_pg(rdev);
6000 
6001 	cik_init_cg(rdev);
6002 
6003 	WREG32(RLC_LB_CNTR_INIT, 0);
6004 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6005 
6006 	mutex_lock(&rdev->grbm_idx_mutex);
6007 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6008 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6009 	WREG32(RLC_LB_PARAMS, 0x00600408);
6010 	WREG32(RLC_LB_CNTL, 0x80000004);
6011 	mutex_unlock(&rdev->grbm_idx_mutex);
6012 
6013 	WREG32(RLC_MC_CNTL, 0);
6014 	WREG32(RLC_UCODE_CNTL, 0);
6015 
6016 	if (rdev->new_fw) {
6017 		const struct rlc_firmware_header_v1_0 *hdr =
6018 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6019 		const __le32 *fw_data = (const __le32 *)
6020 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6021 
6022 		radeon_ucode_print_rlc_hdr(&hdr->header);
6023 
6024 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6025 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6026 		for (i = 0; i < size; i++)
6027 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6028 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6029 	} else {
6030 		const __be32 *fw_data;
6031 
6032 		switch (rdev->family) {
6033 		case CHIP_BONAIRE:
6034 		case CHIP_HAWAII:
6035 		default:
6036 			size = BONAIRE_RLC_UCODE_SIZE;
6037 			break;
6038 		case CHIP_KAVERI:
6039 			size = KV_RLC_UCODE_SIZE;
6040 			break;
6041 		case CHIP_KABINI:
6042 			size = KB_RLC_UCODE_SIZE;
6043 			break;
6044 		case CHIP_MULLINS:
6045 			size = ML_RLC_UCODE_SIZE;
6046 			break;
6047 		}
6048 
6049 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6050 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6051 		for (i = 0; i < size; i++)
6052 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6053 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6054 	}
6055 
6056 	/* XXX - find out what chips support lbpw */
6057 	cik_enable_lbpw(rdev, false);
6058 
6059 	if (rdev->family == CHIP_BONAIRE)
6060 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6061 
6062 	cik_rlc_start(rdev);
6063 
6064 	return 0;
6065 }
6066 
6067 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6068 {
6069 	u32 data, orig, tmp, tmp2;
6070 
6071 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6072 
6073 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6074 		cik_enable_gui_idle_interrupt(rdev, true);
6075 
6076 		tmp = cik_halt_rlc(rdev);
6077 
6078 		mutex_lock(&rdev->grbm_idx_mutex);
6079 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6080 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6081 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6082 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6083 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6084 		mutex_unlock(&rdev->grbm_idx_mutex);
6085 
6086 		cik_update_rlc(rdev, tmp);
6087 
6088 		data |= CGCG_EN | CGLS_EN;
6089 	} else {
6090 		cik_enable_gui_idle_interrupt(rdev, false);
6091 
6092 		RREG32(CB_CGTT_SCLK_CTRL);
6093 		RREG32(CB_CGTT_SCLK_CTRL);
6094 		RREG32(CB_CGTT_SCLK_CTRL);
6095 		RREG32(CB_CGTT_SCLK_CTRL);
6096 
6097 		data &= ~(CGCG_EN | CGLS_EN);
6098 	}
6099 
6100 	if (orig != data)
6101 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6102 
6103 }
6104 
6105 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6106 {
6107 	u32 data, orig, tmp = 0;
6108 
6109 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6110 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6111 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6112 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6113 				data |= CP_MEM_LS_EN;
6114 				if (orig != data)
6115 					WREG32(CP_MEM_SLP_CNTL, data);
6116 			}
6117 		}
6118 
6119 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6120 		data |= 0x00000001;
6121 		data &= 0xfffffffd;
6122 		if (orig != data)
6123 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6124 
6125 		tmp = cik_halt_rlc(rdev);
6126 
6127 		mutex_lock(&rdev->grbm_idx_mutex);
6128 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6129 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6130 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6131 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6132 		WREG32(RLC_SERDES_WR_CTRL, data);
6133 		mutex_unlock(&rdev->grbm_idx_mutex);
6134 
6135 		cik_update_rlc(rdev, tmp);
6136 
6137 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6138 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6139 			data &= ~SM_MODE_MASK;
6140 			data |= SM_MODE(0x2);
6141 			data |= SM_MODE_ENABLE;
6142 			data &= ~CGTS_OVERRIDE;
6143 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6144 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6145 				data &= ~CGTS_LS_OVERRIDE;
6146 			data &= ~ON_MONITOR_ADD_MASK;
6147 			data |= ON_MONITOR_ADD_EN;
6148 			data |= ON_MONITOR_ADD(0x96);
6149 			if (orig != data)
6150 				WREG32(CGTS_SM_CTRL_REG, data);
6151 		}
6152 	} else {
6153 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6154 		data |= 0x00000003;
6155 		if (orig != data)
6156 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6157 
6158 		data = RREG32(RLC_MEM_SLP_CNTL);
6159 		if (data & RLC_MEM_LS_EN) {
6160 			data &= ~RLC_MEM_LS_EN;
6161 			WREG32(RLC_MEM_SLP_CNTL, data);
6162 		}
6163 
6164 		data = RREG32(CP_MEM_SLP_CNTL);
6165 		if (data & CP_MEM_LS_EN) {
6166 			data &= ~CP_MEM_LS_EN;
6167 			WREG32(CP_MEM_SLP_CNTL, data);
6168 		}
6169 
6170 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6171 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6172 		if (orig != data)
6173 			WREG32(CGTS_SM_CTRL_REG, data);
6174 
6175 		tmp = cik_halt_rlc(rdev);
6176 
6177 		mutex_lock(&rdev->grbm_idx_mutex);
6178 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6179 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6180 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6181 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6182 		WREG32(RLC_SERDES_WR_CTRL, data);
6183 		mutex_unlock(&rdev->grbm_idx_mutex);
6184 
6185 		cik_update_rlc(rdev, tmp);
6186 	}
6187 }
6188 
6189 static const u32 mc_cg_registers[] =
6190 {
6191 	MC_HUB_MISC_HUB_CG,
6192 	MC_HUB_MISC_SIP_CG,
6193 	MC_HUB_MISC_VM_CG,
6194 	MC_XPB_CLK_GAT,
6195 	ATC_MISC_CG,
6196 	MC_CITF_MISC_WR_CG,
6197 	MC_CITF_MISC_RD_CG,
6198 	MC_CITF_MISC_VM_CG,
6199 	VM_L2_CG,
6200 };
6201 
6202 static void cik_enable_mc_ls(struct radeon_device *rdev,
6203 			     bool enable)
6204 {
6205 	int i;
6206 	u32 orig, data;
6207 
6208 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6209 		orig = data = RREG32(mc_cg_registers[i]);
6210 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6211 			data |= MC_LS_ENABLE;
6212 		else
6213 			data &= ~MC_LS_ENABLE;
6214 		if (data != orig)
6215 			WREG32(mc_cg_registers[i], data);
6216 	}
6217 }
6218 
6219 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6220 			       bool enable)
6221 {
6222 	int i;
6223 	u32 orig, data;
6224 
6225 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6226 		orig = data = RREG32(mc_cg_registers[i]);
6227 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6228 			data |= MC_CG_ENABLE;
6229 		else
6230 			data &= ~MC_CG_ENABLE;
6231 		if (data != orig)
6232 			WREG32(mc_cg_registers[i], data);
6233 	}
6234 }
6235 
6236 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6237 				 bool enable)
6238 {
6239 	u32 orig, data;
6240 
6241 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6242 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6243 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6244 	} else {
6245 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6246 		data |= 0xff000000;
6247 		if (data != orig)
6248 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6249 
6250 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6251 		data |= 0xff000000;
6252 		if (data != orig)
6253 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6254 	}
6255 }
6256 
6257 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6258 				 bool enable)
6259 {
6260 	u32 orig, data;
6261 
6262 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6263 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6264 		data |= 0x100;
6265 		if (orig != data)
6266 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6267 
6268 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6269 		data |= 0x100;
6270 		if (orig != data)
6271 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6272 	} else {
6273 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6274 		data &= ~0x100;
6275 		if (orig != data)
6276 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6277 
6278 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6279 		data &= ~0x100;
6280 		if (orig != data)
6281 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6282 	}
6283 }
6284 
6285 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6286 				bool enable)
6287 {
6288 	u32 orig, data;
6289 
6290 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6291 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6292 		data = 0xfff;
6293 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6294 
6295 		orig = data = RREG32(UVD_CGC_CTRL);
6296 		data |= DCM;
6297 		if (orig != data)
6298 			WREG32(UVD_CGC_CTRL, data);
6299 	} else {
6300 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6301 		data &= ~0xfff;
6302 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6303 
6304 		orig = data = RREG32(UVD_CGC_CTRL);
6305 		data &= ~DCM;
6306 		if (orig != data)
6307 			WREG32(UVD_CGC_CTRL, data);
6308 	}
6309 }
6310 
6311 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6312 			       bool enable)
6313 {
6314 	u32 orig, data;
6315 
6316 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6317 
6318 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6319 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6320 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6321 	else
6322 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6323 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6324 
6325 	if (orig != data)
6326 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6327 }
6328 
6329 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6330 				bool enable)
6331 {
6332 	u32 orig, data;
6333 
6334 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6335 
6336 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6337 		data &= ~CLOCK_GATING_DIS;
6338 	else
6339 		data |= CLOCK_GATING_DIS;
6340 
6341 	if (orig != data)
6342 		WREG32(HDP_HOST_PATH_CNTL, data);
6343 }
6344 
6345 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6346 			      bool enable)
6347 {
6348 	u32 orig, data;
6349 
6350 	orig = data = RREG32(HDP_MEM_POWER_LS);
6351 
6352 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6353 		data |= HDP_LS_ENABLE;
6354 	else
6355 		data &= ~HDP_LS_ENABLE;
6356 
6357 	if (orig != data)
6358 		WREG32(HDP_MEM_POWER_LS, data);
6359 }
6360 
6361 void cik_update_cg(struct radeon_device *rdev,
6362 		   u32 block, bool enable)
6363 {
6364 
6365 	if (block & RADEON_CG_BLOCK_GFX) {
6366 		cik_enable_gui_idle_interrupt(rdev, false);
6367 		/* order matters! */
6368 		if (enable) {
6369 			cik_enable_mgcg(rdev, true);
6370 			cik_enable_cgcg(rdev, true);
6371 		} else {
6372 			cik_enable_cgcg(rdev, false);
6373 			cik_enable_mgcg(rdev, false);
6374 		}
6375 		cik_enable_gui_idle_interrupt(rdev, true);
6376 	}
6377 
6378 	if (block & RADEON_CG_BLOCK_MC) {
6379 		if (!(rdev->flags & RADEON_IS_IGP)) {
6380 			cik_enable_mc_mgcg(rdev, enable);
6381 			cik_enable_mc_ls(rdev, enable);
6382 		}
6383 	}
6384 
6385 	if (block & RADEON_CG_BLOCK_SDMA) {
6386 		cik_enable_sdma_mgcg(rdev, enable);
6387 		cik_enable_sdma_mgls(rdev, enable);
6388 	}
6389 
6390 	if (block & RADEON_CG_BLOCK_BIF) {
6391 		cik_enable_bif_mgls(rdev, enable);
6392 	}
6393 
6394 	if (block & RADEON_CG_BLOCK_UVD) {
6395 		if (rdev->has_uvd)
6396 			cik_enable_uvd_mgcg(rdev, enable);
6397 	}
6398 
6399 	if (block & RADEON_CG_BLOCK_HDP) {
6400 		cik_enable_hdp_mgcg(rdev, enable);
6401 		cik_enable_hdp_ls(rdev, enable);
6402 	}
6403 
6404 	if (block & RADEON_CG_BLOCK_VCE) {
6405 		vce_v2_0_enable_mgcg(rdev, enable);
6406 	}
6407 }
6408 
6409 static void cik_init_cg(struct radeon_device *rdev)
6410 {
6411 
6412 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6413 
6414 	if (rdev->has_uvd)
6415 		si_init_uvd_internal_cg(rdev);
6416 
6417 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6418 			     RADEON_CG_BLOCK_SDMA |
6419 			     RADEON_CG_BLOCK_BIF |
6420 			     RADEON_CG_BLOCK_UVD |
6421 			     RADEON_CG_BLOCK_HDP), true);
6422 }
6423 
6424 static void cik_fini_cg(struct radeon_device *rdev)
6425 {
6426 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6427 			     RADEON_CG_BLOCK_SDMA |
6428 			     RADEON_CG_BLOCK_BIF |
6429 			     RADEON_CG_BLOCK_UVD |
6430 			     RADEON_CG_BLOCK_HDP), false);
6431 
6432 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6433 }
6434 
6435 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6436 					  bool enable)
6437 {
6438 	u32 data, orig;
6439 
6440 	orig = data = RREG32(RLC_PG_CNTL);
6441 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6442 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6443 	else
6444 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6445 	if (orig != data)
6446 		WREG32(RLC_PG_CNTL, data);
6447 }
6448 
6449 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6450 					  bool enable)
6451 {
6452 	u32 data, orig;
6453 
6454 	orig = data = RREG32(RLC_PG_CNTL);
6455 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6457 	else
6458 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6459 	if (orig != data)
6460 		WREG32(RLC_PG_CNTL, data);
6461 }
6462 
6463 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6464 {
6465 	u32 data, orig;
6466 
6467 	orig = data = RREG32(RLC_PG_CNTL);
6468 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6469 		data &= ~DISABLE_CP_PG;
6470 	else
6471 		data |= DISABLE_CP_PG;
6472 	if (orig != data)
6473 		WREG32(RLC_PG_CNTL, data);
6474 }
6475 
6476 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6477 {
6478 	u32 data, orig;
6479 
6480 	orig = data = RREG32(RLC_PG_CNTL);
6481 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6482 		data &= ~DISABLE_GDS_PG;
6483 	else
6484 		data |= DISABLE_GDS_PG;
6485 	if (orig != data)
6486 		WREG32(RLC_PG_CNTL, data);
6487 }
6488 
6489 #define CP_ME_TABLE_SIZE    96
6490 #define CP_ME_TABLE_OFFSET  2048
6491 #define CP_MEC_TABLE_OFFSET 4096
6492 
6493 void cik_init_cp_pg_table(struct radeon_device *rdev)
6494 {
6495 	volatile u32 *dst_ptr;
6496 	int me, i, max_me = 4;
6497 	u32 bo_offset = 0;
6498 	u32 table_offset, table_size;
6499 
6500 	if (rdev->family == CHIP_KAVERI)
6501 		max_me = 5;
6502 
6503 	if (rdev->rlc.cp_table_ptr == NULL)
6504 		return;
6505 
6506 	/* write the cp table buffer */
6507 	dst_ptr = rdev->rlc.cp_table_ptr;
6508 	for (me = 0; me < max_me; me++) {
6509 		if (rdev->new_fw) {
6510 			const __le32 *fw_data;
6511 			const struct gfx_firmware_header_v1_0 *hdr;
6512 
6513 			if (me == 0) {
6514 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6515 				fw_data = (const __le32 *)
6516 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6517 				table_offset = le32_to_cpu(hdr->jt_offset);
6518 				table_size = le32_to_cpu(hdr->jt_size);
6519 			} else if (me == 1) {
6520 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6521 				fw_data = (const __le32 *)
6522 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6523 				table_offset = le32_to_cpu(hdr->jt_offset);
6524 				table_size = le32_to_cpu(hdr->jt_size);
6525 			} else if (me == 2) {
6526 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6527 				fw_data = (const __le32 *)
6528 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6529 				table_offset = le32_to_cpu(hdr->jt_offset);
6530 				table_size = le32_to_cpu(hdr->jt_size);
6531 			} else if (me == 3) {
6532 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6533 				fw_data = (const __le32 *)
6534 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6535 				table_offset = le32_to_cpu(hdr->jt_offset);
6536 				table_size = le32_to_cpu(hdr->jt_size);
6537 			} else {
6538 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6539 				fw_data = (const __le32 *)
6540 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6541 				table_offset = le32_to_cpu(hdr->jt_offset);
6542 				table_size = le32_to_cpu(hdr->jt_size);
6543 			}
6544 
6545 			for (i = 0; i < table_size; i ++) {
6546 				dst_ptr[bo_offset + i] =
6547 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6548 			}
6549 			bo_offset += table_size;
6550 		} else {
6551 			const __be32 *fw_data;
6552 			table_size = CP_ME_TABLE_SIZE;
6553 
6554 			if (me == 0) {
6555 				fw_data = (const __be32 *)rdev->ce_fw->data;
6556 				table_offset = CP_ME_TABLE_OFFSET;
6557 			} else if (me == 1) {
6558 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6559 				table_offset = CP_ME_TABLE_OFFSET;
6560 			} else if (me == 2) {
6561 				fw_data = (const __be32 *)rdev->me_fw->data;
6562 				table_offset = CP_ME_TABLE_OFFSET;
6563 			} else {
6564 				fw_data = (const __be32 *)rdev->mec_fw->data;
6565 				table_offset = CP_MEC_TABLE_OFFSET;
6566 			}
6567 
6568 			for (i = 0; i < table_size; i ++) {
6569 				dst_ptr[bo_offset + i] =
6570 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6571 			}
6572 			bo_offset += table_size;
6573 		}
6574 	}
6575 }
6576 
6577 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6578 				bool enable)
6579 {
6580 	u32 data, orig;
6581 
6582 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6583 		orig = data = RREG32(RLC_PG_CNTL);
6584 		data |= GFX_PG_ENABLE;
6585 		if (orig != data)
6586 			WREG32(RLC_PG_CNTL, data);
6587 
6588 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6589 		data |= AUTO_PG_EN;
6590 		if (orig != data)
6591 			WREG32(RLC_AUTO_PG_CTRL, data);
6592 	} else {
6593 		orig = data = RREG32(RLC_PG_CNTL);
6594 		data &= ~GFX_PG_ENABLE;
6595 		if (orig != data)
6596 			WREG32(RLC_PG_CNTL, data);
6597 
6598 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6599 		data &= ~AUTO_PG_EN;
6600 		if (orig != data)
6601 			WREG32(RLC_AUTO_PG_CTRL, data);
6602 
6603 		data = RREG32(DB_RENDER_CONTROL);
6604 	}
6605 }
6606 
6607 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6608 {
6609 	u32 mask = 0, tmp, tmp1;
6610 	int i;
6611 
6612 	mutex_lock(&rdev->grbm_idx_mutex);
6613 	cik_select_se_sh(rdev, se, sh);
6614 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6615 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6616 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6617 	mutex_unlock(&rdev->grbm_idx_mutex);
6618 
6619 	tmp &= 0xffff0000;
6620 
6621 	tmp |= tmp1;
6622 	tmp >>= 16;
6623 
6624 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6625 		mask <<= 1;
6626 		mask |= 1;
6627 	}
6628 
6629 	return (~tmp) & mask;
6630 }
6631 
6632 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6633 {
6634 	u32 i, j, k, active_cu_number = 0;
6635 	u32 mask, counter, cu_bitmap;
6636 	u32 tmp = 0;
6637 
6638 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6639 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6640 			mask = 1;
6641 			cu_bitmap = 0;
6642 			counter = 0;
6643 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6644 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6645 					if (counter < 2)
6646 						cu_bitmap |= mask;
6647 					counter ++;
6648 				}
6649 				mask <<= 1;
6650 			}
6651 
6652 			active_cu_number += counter;
6653 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6654 		}
6655 	}
6656 
6657 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6658 
6659 	tmp = RREG32(RLC_MAX_PG_CU);
6660 	tmp &= ~MAX_PU_CU_MASK;
6661 	tmp |= MAX_PU_CU(active_cu_number);
6662 	WREG32(RLC_MAX_PG_CU, tmp);
6663 }
6664 
6665 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6666 				       bool enable)
6667 {
6668 	u32 data, orig;
6669 
6670 	orig = data = RREG32(RLC_PG_CNTL);
6671 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6672 		data |= STATIC_PER_CU_PG_ENABLE;
6673 	else
6674 		data &= ~STATIC_PER_CU_PG_ENABLE;
6675 	if (orig != data)
6676 		WREG32(RLC_PG_CNTL, data);
6677 }
6678 
6679 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6680 					bool enable)
6681 {
6682 	u32 data, orig;
6683 
6684 	orig = data = RREG32(RLC_PG_CNTL);
6685 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6686 		data |= DYN_PER_CU_PG_ENABLE;
6687 	else
6688 		data &= ~DYN_PER_CU_PG_ENABLE;
6689 	if (orig != data)
6690 		WREG32(RLC_PG_CNTL, data);
6691 }
6692 
6693 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6694 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6695 
6696 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6697 {
6698 	u32 data, orig;
6699 	u32 i;
6700 
6701 	if (rdev->rlc.cs_data) {
6702 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6703 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6704 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6705 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6706 	} else {
6707 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6708 		for (i = 0; i < 3; i++)
6709 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6710 	}
6711 	if (rdev->rlc.reg_list) {
6712 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6713 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6714 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6715 	}
6716 
6717 	orig = data = RREG32(RLC_PG_CNTL);
6718 	data |= GFX_PG_SRC;
6719 	if (orig != data)
6720 		WREG32(RLC_PG_CNTL, data);
6721 
6722 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6723 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6724 
6725 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6726 	data &= ~IDLE_POLL_COUNT_MASK;
6727 	data |= IDLE_POLL_COUNT(0x60);
6728 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6729 
6730 	data = 0x10101010;
6731 	WREG32(RLC_PG_DELAY, data);
6732 
6733 	data = RREG32(RLC_PG_DELAY_2);
6734 	data &= ~0xff;
6735 	data |= 0x3;
6736 	WREG32(RLC_PG_DELAY_2, data);
6737 
6738 	data = RREG32(RLC_AUTO_PG_CTRL);
6739 	data &= ~GRBM_REG_SGIT_MASK;
6740 	data |= GRBM_REG_SGIT(0x700);
6741 	WREG32(RLC_AUTO_PG_CTRL, data);
6742 
6743 }
6744 
6745 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6746 {
6747 	cik_enable_gfx_cgpg(rdev, enable);
6748 	cik_enable_gfx_static_mgpg(rdev, enable);
6749 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6750 }
6751 
6752 u32 cik_get_csb_size(struct radeon_device *rdev)
6753 {
6754 	u32 count = 0;
6755 	const struct cs_section_def *sect = NULL;
6756 	const struct cs_extent_def *ext = NULL;
6757 
6758 	if (rdev->rlc.cs_data == NULL)
6759 		return 0;
6760 
6761 	/* begin clear state */
6762 	count += 2;
6763 	/* context control state */
6764 	count += 3;
6765 
6766 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6767 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6768 			if (sect->id == SECT_CONTEXT)
6769 				count += 2 + ext->reg_count;
6770 			else
6771 				return 0;
6772 		}
6773 	}
6774 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6775 	count += 4;
6776 	/* end clear state */
6777 	count += 2;
6778 	/* clear state */
6779 	count += 2;
6780 
6781 	return count;
6782 }
6783 
6784 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6785 {
6786 	u32 count = 0, i;
6787 	const struct cs_section_def *sect = NULL;
6788 	const struct cs_extent_def *ext = NULL;
6789 
6790 	if (rdev->rlc.cs_data == NULL)
6791 		return;
6792 	if (buffer == NULL)
6793 		return;
6794 
6795 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6796 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6797 
6798 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6799 	buffer[count++] = cpu_to_le32(0x80000000);
6800 	buffer[count++] = cpu_to_le32(0x80000000);
6801 
6802 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6803 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6804 			if (sect->id == SECT_CONTEXT) {
6805 				buffer[count++] =
6806 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6807 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6808 				for (i = 0; i < ext->reg_count; i++)
6809 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6810 			} else {
6811 				return;
6812 			}
6813 		}
6814 	}
6815 
6816 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6817 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6818 	switch (rdev->family) {
6819 	case CHIP_BONAIRE:
6820 		buffer[count++] = cpu_to_le32(0x16000012);
6821 		buffer[count++] = cpu_to_le32(0x00000000);
6822 		break;
6823 	case CHIP_KAVERI:
6824 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6825 		buffer[count++] = cpu_to_le32(0x00000000);
6826 		break;
6827 	case CHIP_KABINI:
6828 	case CHIP_MULLINS:
6829 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6830 		buffer[count++] = cpu_to_le32(0x00000000);
6831 		break;
6832 	case CHIP_HAWAII:
6833 		buffer[count++] = cpu_to_le32(0x3a00161a);
6834 		buffer[count++] = cpu_to_le32(0x0000002e);
6835 		break;
6836 	default:
6837 		buffer[count++] = cpu_to_le32(0x00000000);
6838 		buffer[count++] = cpu_to_le32(0x00000000);
6839 		break;
6840 	}
6841 
6842 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6843 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6844 
6845 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6846 	buffer[count++] = cpu_to_le32(0);
6847 }
6848 
6849 static void cik_init_pg(struct radeon_device *rdev)
6850 {
6851 	if (rdev->pg_flags) {
6852 		cik_enable_sck_slowdown_on_pu(rdev, true);
6853 		cik_enable_sck_slowdown_on_pd(rdev, true);
6854 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6855 			cik_init_gfx_cgpg(rdev);
6856 			cik_enable_cp_pg(rdev, true);
6857 			cik_enable_gds_pg(rdev, true);
6858 		}
6859 		cik_init_ao_cu_mask(rdev);
6860 		cik_update_gfx_pg(rdev, true);
6861 	}
6862 }
6863 
6864 static void cik_fini_pg(struct radeon_device *rdev)
6865 {
6866 	if (rdev->pg_flags) {
6867 		cik_update_gfx_pg(rdev, false);
6868 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869 			cik_enable_cp_pg(rdev, false);
6870 			cik_enable_gds_pg(rdev, false);
6871 		}
6872 	}
6873 }
6874 
6875 /*
6876  * Interrupts
6877  * Starting with r6xx, interrupts are handled via a ring buffer.
6878  * Ring buffers are areas of GPU accessible memory that the GPU
6879  * writes interrupt vectors into and the host reads vectors out of.
6880  * There is a rptr (read pointer) that determines where the
6881  * host is currently reading, and a wptr (write pointer)
6882  * which determines where the GPU has written.  When the
6883  * pointers are equal, the ring is idle.  When the GPU
6884  * writes vectors to the ring buffer, it increments the
6885  * wptr.  When there is an interrupt, the host then starts
6886  * fetching commands and processing them until the pointers are
6887  * equal again at which point it updates the rptr.
6888  */
6889 
6890 /**
6891  * cik_enable_interrupts - Enable the interrupt ring buffer
6892  *
6893  * @rdev: radeon_device pointer
6894  *
6895  * Enable the interrupt ring buffer (CIK).
6896  */
6897 static void cik_enable_interrupts(struct radeon_device *rdev)
6898 {
6899 	u32 ih_cntl = RREG32(IH_CNTL);
6900 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6901 
6902 	ih_cntl |= ENABLE_INTR;
6903 	ih_rb_cntl |= IH_RB_ENABLE;
6904 	WREG32(IH_CNTL, ih_cntl);
6905 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6906 	rdev->ih.enabled = true;
6907 }
6908 
6909 /**
6910  * cik_disable_interrupts - Disable the interrupt ring buffer
6911  *
6912  * @rdev: radeon_device pointer
6913  *
6914  * Disable the interrupt ring buffer (CIK).
6915  */
6916 static void cik_disable_interrupts(struct radeon_device *rdev)
6917 {
6918 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6919 	u32 ih_cntl = RREG32(IH_CNTL);
6920 
6921 	ih_rb_cntl &= ~IH_RB_ENABLE;
6922 	ih_cntl &= ~ENABLE_INTR;
6923 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6924 	WREG32(IH_CNTL, ih_cntl);
6925 	/* set rptr, wptr to 0 */
6926 	WREG32(IH_RB_RPTR, 0);
6927 	WREG32(IH_RB_WPTR, 0);
6928 	rdev->ih.enabled = false;
6929 	rdev->ih.rptr = 0;
6930 }
6931 
6932 /**
6933  * cik_disable_interrupt_state - Disable all interrupt sources
6934  *
6935  * @rdev: radeon_device pointer
6936  *
6937  * Clear all interrupt enable bits used by the driver (CIK).
6938  */
6939 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6940 {
6941 	u32 tmp;
6942 
6943 	/* gfx ring */
6944 	tmp = RREG32(CP_INT_CNTL_RING0) &
6945 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6946 	WREG32(CP_INT_CNTL_RING0, tmp);
6947 	/* sdma */
6948 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6949 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6950 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6951 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6952 	/* compute queues */
6953 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6954 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6955 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6956 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6957 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6958 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6959 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6960 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6961 	/* grbm */
6962 	WREG32(GRBM_INT_CNTL, 0);
6963 	/* SRBM */
6964 	WREG32(SRBM_INT_CNTL, 0);
6965 	/* vline/vblank, etc. */
6966 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6967 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6968 	if (rdev->num_crtc >= 4) {
6969 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6970 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6971 	}
6972 	if (rdev->num_crtc >= 6) {
6973 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6974 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6975 	}
6976 	/* pflip */
6977 	if (rdev->num_crtc >= 2) {
6978 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6979 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6980 	}
6981 	if (rdev->num_crtc >= 4) {
6982 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6983 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6984 	}
6985 	if (rdev->num_crtc >= 6) {
6986 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6987 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6988 	}
6989 
6990 	/* dac hotplug */
6991 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6992 
6993 	/* digital hotplug */
6994 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6995 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6996 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6997 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6998 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6999 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7000 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7001 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7002 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7004 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7005 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7006 
7007 }
7008 
7009 /**
7010  * cik_irq_init - init and enable the interrupt ring
7011  *
7012  * @rdev: radeon_device pointer
7013  *
7014  * Allocate a ring buffer for the interrupt controller,
7015  * enable the RLC, disable interrupts, enable the IH
7016  * ring buffer and enable it (CIK).
7017  * Called at device load and reume.
7018  * Returns 0 for success, errors for failure.
7019  */
7020 static int cik_irq_init(struct radeon_device *rdev)
7021 {
7022 	int ret = 0;
7023 	int rb_bufsz;
7024 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7025 
7026 	/* allocate ring */
7027 	ret = r600_ih_ring_alloc(rdev);
7028 	if (ret)
7029 		return ret;
7030 
7031 	/* disable irqs */
7032 	cik_disable_interrupts(rdev);
7033 
7034 	/* init rlc */
7035 	ret = cik_rlc_resume(rdev);
7036 	if (ret) {
7037 		r600_ih_ring_fini(rdev);
7038 		return ret;
7039 	}
7040 
7041 	/* setup interrupt control */
7042 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7043 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7044 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7045 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7046 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7047 	 */
7048 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7049 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7050 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7051 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7052 
7053 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7054 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7055 
7056 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7057 		      IH_WPTR_OVERFLOW_CLEAR |
7058 		      (rb_bufsz << 1));
7059 
7060 	if (rdev->wb.enabled)
7061 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7062 
7063 	/* set the writeback address whether it's enabled or not */
7064 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7065 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7066 
7067 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7068 
7069 	/* set rptr, wptr to 0 */
7070 	WREG32(IH_RB_RPTR, 0);
7071 	WREG32(IH_RB_WPTR, 0);
7072 
7073 	/* Default settings for IH_CNTL (disabled at first) */
7074 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7075 	/* RPTR_REARM only works if msi's are enabled */
7076 	if (rdev->msi_enabled)
7077 		ih_cntl |= RPTR_REARM;
7078 	WREG32(IH_CNTL, ih_cntl);
7079 
7080 	/* force the active interrupt state to all disabled */
7081 	cik_disable_interrupt_state(rdev);
7082 
7083 	pci_set_master(rdev->pdev);
7084 
7085 	/* enable irqs */
7086 	cik_enable_interrupts(rdev);
7087 
7088 	return ret;
7089 }
7090 
7091 /**
7092  * cik_irq_set - enable/disable interrupt sources
7093  *
7094  * @rdev: radeon_device pointer
7095  *
7096  * Enable interrupt sources on the GPU (vblanks, hpd,
7097  * etc.) (CIK).
7098  * Returns 0 for success, errors for failure.
7099  */
7100 int cik_irq_set(struct radeon_device *rdev)
7101 {
7102 	u32 cp_int_cntl;
7103 	u32 cp_m1p0;
7104 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7105 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7106 	u32 grbm_int_cntl = 0;
7107 	u32 dma_cntl, dma_cntl1;
7108 
7109 	if (!rdev->irq.installed) {
7110 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7111 		return -EINVAL;
7112 	}
7113 	/* don't enable anything if the ih is disabled */
7114 	if (!rdev->ih.enabled) {
7115 		cik_disable_interrupts(rdev);
7116 		/* force the active interrupt state to all disabled */
7117 		cik_disable_interrupt_state(rdev);
7118 		return 0;
7119 	}
7120 
7121 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7122 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7123 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7124 
7125 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7130 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7131 
7132 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7133 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7134 
7135 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7136 
7137 	/* enable CP interrupts on all rings */
7138 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7139 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7140 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7141 	}
7142 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7143 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7144 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7145 		if (ring->me == 1) {
7146 			switch (ring->pipe) {
7147 			case 0:
7148 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7149 				break;
7150 			default:
7151 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7152 				break;
7153 			}
7154 		} else {
7155 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7156 		}
7157 	}
7158 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7159 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7160 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7161 		if (ring->me == 1) {
7162 			switch (ring->pipe) {
7163 			case 0:
7164 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7165 				break;
7166 			default:
7167 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7168 				break;
7169 			}
7170 		} else {
7171 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7172 		}
7173 	}
7174 
7175 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7176 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7177 		dma_cntl |= TRAP_ENABLE;
7178 	}
7179 
7180 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7181 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7182 		dma_cntl1 |= TRAP_ENABLE;
7183 	}
7184 
7185 	if (rdev->irq.crtc_vblank_int[0] ||
7186 	    atomic_read(&rdev->irq.pflip[0])) {
7187 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7188 		crtc1 |= VBLANK_INTERRUPT_MASK;
7189 	}
7190 	if (rdev->irq.crtc_vblank_int[1] ||
7191 	    atomic_read(&rdev->irq.pflip[1])) {
7192 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7193 		crtc2 |= VBLANK_INTERRUPT_MASK;
7194 	}
7195 	if (rdev->irq.crtc_vblank_int[2] ||
7196 	    atomic_read(&rdev->irq.pflip[2])) {
7197 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7198 		crtc3 |= VBLANK_INTERRUPT_MASK;
7199 	}
7200 	if (rdev->irq.crtc_vblank_int[3] ||
7201 	    atomic_read(&rdev->irq.pflip[3])) {
7202 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7203 		crtc4 |= VBLANK_INTERRUPT_MASK;
7204 	}
7205 	if (rdev->irq.crtc_vblank_int[4] ||
7206 	    atomic_read(&rdev->irq.pflip[4])) {
7207 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7208 		crtc5 |= VBLANK_INTERRUPT_MASK;
7209 	}
7210 	if (rdev->irq.crtc_vblank_int[5] ||
7211 	    atomic_read(&rdev->irq.pflip[5])) {
7212 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7213 		crtc6 |= VBLANK_INTERRUPT_MASK;
7214 	}
7215 	if (rdev->irq.hpd[0]) {
7216 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7217 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218 	}
7219 	if (rdev->irq.hpd[1]) {
7220 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7221 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222 	}
7223 	if (rdev->irq.hpd[2]) {
7224 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7225 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 	}
7227 	if (rdev->irq.hpd[3]) {
7228 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7229 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230 	}
7231 	if (rdev->irq.hpd[4]) {
7232 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7233 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7234 	}
7235 	if (rdev->irq.hpd[5]) {
7236 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7237 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7238 	}
7239 
7240 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7241 
7242 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7243 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7244 
7245 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7246 
7247 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7248 
7249 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7250 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7251 	if (rdev->num_crtc >= 4) {
7252 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7253 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7254 	}
7255 	if (rdev->num_crtc >= 6) {
7256 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7257 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7258 	}
7259 
7260 	if (rdev->num_crtc >= 2) {
7261 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7262 		       GRPH_PFLIP_INT_MASK);
7263 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7264 		       GRPH_PFLIP_INT_MASK);
7265 	}
7266 	if (rdev->num_crtc >= 4) {
7267 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7268 		       GRPH_PFLIP_INT_MASK);
7269 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7270 		       GRPH_PFLIP_INT_MASK);
7271 	}
7272 	if (rdev->num_crtc >= 6) {
7273 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7274 		       GRPH_PFLIP_INT_MASK);
7275 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7276 		       GRPH_PFLIP_INT_MASK);
7277 	}
7278 
7279 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7280 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7281 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7282 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7283 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7284 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7285 
7286 	/* posting read */
7287 	RREG32(SRBM_STATUS);
7288 
7289 	return 0;
7290 }
7291 
7292 /**
7293  * cik_irq_ack - ack interrupt sources
7294  *
7295  * @rdev: radeon_device pointer
7296  *
7297  * Ack interrupt sources on the GPU (vblanks, hpd,
7298  * etc.) (CIK).  Certain interrupts sources are sw
7299  * generated and do not require an explicit ack.
7300  */
7301 static inline void cik_irq_ack(struct radeon_device *rdev)
7302 {
7303 	u32 tmp;
7304 
7305 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7306 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7307 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7308 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7309 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7310 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7311 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7312 
7313 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7314 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7315 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7316 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7317 	if (rdev->num_crtc >= 4) {
7318 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7319 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7320 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7321 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7322 	}
7323 	if (rdev->num_crtc >= 6) {
7324 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7325 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7326 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7327 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7328 	}
7329 
7330 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7331 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7332 		       GRPH_PFLIP_INT_CLEAR);
7333 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7334 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7335 		       GRPH_PFLIP_INT_CLEAR);
7336 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7337 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7338 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7339 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7340 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7341 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7342 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7343 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7344 
7345 	if (rdev->num_crtc >= 4) {
7346 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7347 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7348 			       GRPH_PFLIP_INT_CLEAR);
7349 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7350 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7351 			       GRPH_PFLIP_INT_CLEAR);
7352 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7353 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7354 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7355 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7356 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7357 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7358 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7359 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7360 	}
7361 
7362 	if (rdev->num_crtc >= 6) {
7363 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7364 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7365 			       GRPH_PFLIP_INT_CLEAR);
7366 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7367 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7368 			       GRPH_PFLIP_INT_CLEAR);
7369 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7370 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7371 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7372 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7373 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7374 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7375 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7376 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7377 	}
7378 
7379 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7380 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7381 		tmp |= DC_HPDx_INT_ACK;
7382 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7383 	}
7384 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7385 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7386 		tmp |= DC_HPDx_INT_ACK;
7387 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7388 	}
7389 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7390 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7391 		tmp |= DC_HPDx_INT_ACK;
7392 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7393 	}
7394 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7395 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7396 		tmp |= DC_HPDx_INT_ACK;
7397 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7398 	}
7399 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7400 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7401 		tmp |= DC_HPDx_INT_ACK;
7402 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7403 	}
7404 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7405 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7406 		tmp |= DC_HPDx_INT_ACK;
7407 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7408 	}
7409 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7410 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7411 		tmp |= DC_HPDx_RX_INT_ACK;
7412 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7413 	}
7414 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7415 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7416 		tmp |= DC_HPDx_RX_INT_ACK;
7417 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7418 	}
7419 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7420 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7421 		tmp |= DC_HPDx_RX_INT_ACK;
7422 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7423 	}
7424 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7425 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7426 		tmp |= DC_HPDx_RX_INT_ACK;
7427 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7428 	}
7429 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7430 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7431 		tmp |= DC_HPDx_RX_INT_ACK;
7432 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7433 	}
7434 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7435 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7436 		tmp |= DC_HPDx_RX_INT_ACK;
7437 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7438 	}
7439 }
7440 
7441 /**
7442  * cik_irq_disable - disable interrupts
7443  *
7444  * @rdev: radeon_device pointer
7445  *
7446  * Disable interrupts on the hw (CIK).
7447  */
7448 static void cik_irq_disable(struct radeon_device *rdev)
7449 {
7450 	cik_disable_interrupts(rdev);
7451 	/* Wait and acknowledge irq */
7452 	mdelay(1);
7453 	cik_irq_ack(rdev);
7454 	cik_disable_interrupt_state(rdev);
7455 }
7456 
7457 /**
7458  * cik_irq_disable - disable interrupts for suspend
7459  *
7460  * @rdev: radeon_device pointer
7461  *
7462  * Disable interrupts and stop the RLC (CIK).
7463  * Used for suspend.
7464  */
7465 static void cik_irq_suspend(struct radeon_device *rdev)
7466 {
7467 	cik_irq_disable(rdev);
7468 	cik_rlc_stop(rdev);
7469 }
7470 
7471 /**
7472  * cik_irq_fini - tear down interrupt support
7473  *
7474  * @rdev: radeon_device pointer
7475  *
7476  * Disable interrupts on the hw and free the IH ring
7477  * buffer (CIK).
7478  * Used for driver unload.
7479  */
7480 static void cik_irq_fini(struct radeon_device *rdev)
7481 {
7482 	cik_irq_suspend(rdev);
7483 	r600_ih_ring_fini(rdev);
7484 }
7485 
7486 /**
7487  * cik_get_ih_wptr - get the IH ring buffer wptr
7488  *
7489  * @rdev: radeon_device pointer
7490  *
7491  * Get the IH ring buffer wptr from either the register
7492  * or the writeback memory buffer (CIK).  Also check for
7493  * ring buffer overflow and deal with it.
7494  * Used by cik_irq_process().
7495  * Returns the value of the wptr.
7496  */
7497 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7498 {
7499 	u32 wptr, tmp;
7500 
7501 	if (rdev->wb.enabled)
7502 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7503 	else
7504 		wptr = RREG32(IH_RB_WPTR);
7505 
7506 	if (wptr & RB_OVERFLOW) {
7507 		wptr &= ~RB_OVERFLOW;
7508 		/* When a ring buffer overflow happen start parsing interrupt
7509 		 * from the last not overwritten vector (wptr + 16). Hopefully
7510 		 * this should allow us to catchup.
7511 		 */
7512 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7513 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7514 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7515 		tmp = RREG32(IH_RB_CNTL);
7516 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7517 		WREG32(IH_RB_CNTL, tmp);
7518 	}
7519 	return (wptr & rdev->ih.ptr_mask);
7520 }
7521 
7522 /*        CIK IV Ring
7523  * Each IV ring entry is 128 bits:
7524  * [7:0]    - interrupt source id
7525  * [31:8]   - reserved
7526  * [59:32]  - interrupt source data
7527  * [63:60]  - reserved
7528  * [71:64]  - RINGID
7529  *            CP:
7530  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7531  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7532  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7533  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7534  *            PIPE_ID - ME0 0=3D
7535  *                    - ME1&2 compute dispatcher (4 pipes each)
7536  *            SDMA:
7537  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7538  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7539  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7540  * [79:72]  - VMID
7541  * [95:80]  - PASID
7542  * [127:96] - reserved
7543  */
7544 /**
7545  * cik_irq_process - interrupt handler
7546  *
7547  * @rdev: radeon_device pointer
7548  *
7549  * Interrupt hander (CIK).  Walk the IH ring,
7550  * ack interrupts and schedule work to handle
7551  * interrupt events.
7552  * Returns irq process return code.
7553  */
7554 int cik_irq_process(struct radeon_device *rdev)
7555 {
7556 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7557 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7558 	u32 wptr;
7559 	u32 rptr;
7560 	u32 src_id, src_data, ring_id;
7561 	u8 me_id, pipe_id, queue_id;
7562 	u32 ring_index;
7563 	bool queue_hotplug = false;
7564 	bool queue_dp = false;
7565 	bool queue_reset = false;
7566 	u32 addr, status, mc_client;
7567 	bool queue_thermal = false;
7568 
7569 	if (!rdev->ih.enabled || rdev->shutdown)
7570 		return IRQ_NONE;
7571 
7572 	wptr = cik_get_ih_wptr(rdev);
7573 
7574 restart_ih:
7575 	/* is somebody else already processing irqs? */
7576 	if (atomic_xchg(&rdev->ih.lock, 1))
7577 		return IRQ_NONE;
7578 
7579 	rptr = rdev->ih.rptr;
7580 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7581 
7582 	/* Order reading of wptr vs. reading of IH ring data */
7583 	rmb();
7584 
7585 	/* display interrupts */
7586 	cik_irq_ack(rdev);
7587 
7588 	while (rptr != wptr) {
7589 		/* wptr/rptr are in bytes! */
7590 		ring_index = rptr / 4;
7591 
7592 		radeon_kfd_interrupt(rdev,
7593 				(const void *) &rdev->ih.ring[ring_index]);
7594 
7595 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7596 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7597 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7598 
7599 		switch (src_id) {
7600 		case 1: /* D1 vblank/vline */
7601 			switch (src_data) {
7602 			case 0: /* D1 vblank */
7603 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7604 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7605 
7606 				if (rdev->irq.crtc_vblank_int[0]) {
7607 					drm_handle_vblank(rdev->ddev, 0);
7608 					rdev->pm.vblank_sync = true;
7609 					wake_up(&rdev->irq.vblank_queue);
7610 				}
7611 				if (atomic_read(&rdev->irq.pflip[0]))
7612 					radeon_crtc_handle_vblank(rdev, 0);
7613 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7614 				DRM_DEBUG("IH: D1 vblank\n");
7615 
7616 				break;
7617 			case 1: /* D1 vline */
7618 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7619 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7620 
7621 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7622 				DRM_DEBUG("IH: D1 vline\n");
7623 
7624 				break;
7625 			default:
7626 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7627 				break;
7628 			}
7629 			break;
7630 		case 2: /* D2 vblank/vline */
7631 			switch (src_data) {
7632 			case 0: /* D2 vblank */
7633 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7634 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7635 
7636 				if (rdev->irq.crtc_vblank_int[1]) {
7637 					drm_handle_vblank(rdev->ddev, 1);
7638 					rdev->pm.vblank_sync = true;
7639 					wake_up(&rdev->irq.vblank_queue);
7640 				}
7641 				if (atomic_read(&rdev->irq.pflip[1]))
7642 					radeon_crtc_handle_vblank(rdev, 1);
7643 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7644 				DRM_DEBUG("IH: D2 vblank\n");
7645 
7646 				break;
7647 			case 1: /* D2 vline */
7648 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7649 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7650 
7651 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7652 				DRM_DEBUG("IH: D2 vline\n");
7653 
7654 				break;
7655 			default:
7656 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7657 				break;
7658 			}
7659 			break;
7660 		case 3: /* D3 vblank/vline */
7661 			switch (src_data) {
7662 			case 0: /* D3 vblank */
7663 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7664 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7665 
7666 				if (rdev->irq.crtc_vblank_int[2]) {
7667 					drm_handle_vblank(rdev->ddev, 2);
7668 					rdev->pm.vblank_sync = true;
7669 					wake_up(&rdev->irq.vblank_queue);
7670 				}
7671 				if (atomic_read(&rdev->irq.pflip[2]))
7672 					radeon_crtc_handle_vblank(rdev, 2);
7673 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7674 				DRM_DEBUG("IH: D3 vblank\n");
7675 
7676 				break;
7677 			case 1: /* D3 vline */
7678 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7679 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7680 
7681 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7682 				DRM_DEBUG("IH: D3 vline\n");
7683 
7684 				break;
7685 			default:
7686 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7687 				break;
7688 			}
7689 			break;
7690 		case 4: /* D4 vblank/vline */
7691 			switch (src_data) {
7692 			case 0: /* D4 vblank */
7693 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7694 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7695 
7696 				if (rdev->irq.crtc_vblank_int[3]) {
7697 					drm_handle_vblank(rdev->ddev, 3);
7698 					rdev->pm.vblank_sync = true;
7699 					wake_up(&rdev->irq.vblank_queue);
7700 				}
7701 				if (atomic_read(&rdev->irq.pflip[3]))
7702 					radeon_crtc_handle_vblank(rdev, 3);
7703 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7704 				DRM_DEBUG("IH: D4 vblank\n");
7705 
7706 				break;
7707 			case 1: /* D4 vline */
7708 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7709 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7710 
7711 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7712 				DRM_DEBUG("IH: D4 vline\n");
7713 
7714 				break;
7715 			default:
7716 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7717 				break;
7718 			}
7719 			break;
7720 		case 5: /* D5 vblank/vline */
7721 			switch (src_data) {
7722 			case 0: /* D5 vblank */
7723 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7724 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7725 
7726 				if (rdev->irq.crtc_vblank_int[4]) {
7727 					drm_handle_vblank(rdev->ddev, 4);
7728 					rdev->pm.vblank_sync = true;
7729 					wake_up(&rdev->irq.vblank_queue);
7730 				}
7731 				if (atomic_read(&rdev->irq.pflip[4]))
7732 					radeon_crtc_handle_vblank(rdev, 4);
7733 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7734 				DRM_DEBUG("IH: D5 vblank\n");
7735 
7736 				break;
7737 			case 1: /* D5 vline */
7738 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7739 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7740 
7741 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7742 				DRM_DEBUG("IH: D5 vline\n");
7743 
7744 				break;
7745 			default:
7746 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7747 				break;
7748 			}
7749 			break;
7750 		case 6: /* D6 vblank/vline */
7751 			switch (src_data) {
7752 			case 0: /* D6 vblank */
7753 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7754 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755 
7756 				if (rdev->irq.crtc_vblank_int[5]) {
7757 					drm_handle_vblank(rdev->ddev, 5);
7758 					rdev->pm.vblank_sync = true;
7759 					wake_up(&rdev->irq.vblank_queue);
7760 				}
7761 				if (atomic_read(&rdev->irq.pflip[5]))
7762 					radeon_crtc_handle_vblank(rdev, 5);
7763 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7764 				DRM_DEBUG("IH: D6 vblank\n");
7765 
7766 				break;
7767 			case 1: /* D6 vline */
7768 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7769 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7770 
7771 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7772 				DRM_DEBUG("IH: D6 vline\n");
7773 
7774 				break;
7775 			default:
7776 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7777 				break;
7778 			}
7779 			break;
7780 		case 8: /* D1 page flip */
7781 		case 10: /* D2 page flip */
7782 		case 12: /* D3 page flip */
7783 		case 14: /* D4 page flip */
7784 		case 16: /* D5 page flip */
7785 		case 18: /* D6 page flip */
7786 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7787 			if (radeon_use_pflipirq > 0)
7788 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7789 			break;
7790 		case 42: /* HPD hotplug */
7791 			switch (src_data) {
7792 			case 0:
7793 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7794 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7795 
7796 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7797 				queue_hotplug = true;
7798 				DRM_DEBUG("IH: HPD1\n");
7799 
7800 				break;
7801 			case 1:
7802 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7803 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7804 
7805 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7806 				queue_hotplug = true;
7807 				DRM_DEBUG("IH: HPD2\n");
7808 
7809 				break;
7810 			case 2:
7811 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7812 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7813 
7814 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7815 				queue_hotplug = true;
7816 				DRM_DEBUG("IH: HPD3\n");
7817 
7818 				break;
7819 			case 3:
7820 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7821 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7822 
7823 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7824 				queue_hotplug = true;
7825 				DRM_DEBUG("IH: HPD4\n");
7826 
7827 				break;
7828 			case 4:
7829 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7830 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7831 
7832 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7833 				queue_hotplug = true;
7834 				DRM_DEBUG("IH: HPD5\n");
7835 
7836 				break;
7837 			case 5:
7838 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7839 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7840 
7841 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7842 				queue_hotplug = true;
7843 				DRM_DEBUG("IH: HPD6\n");
7844 
7845 				break;
7846 			case 6:
7847 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7848 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7849 
7850 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7851 				queue_dp = true;
7852 				DRM_DEBUG("IH: HPD_RX 1\n");
7853 
7854 				break;
7855 			case 7:
7856 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7857 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7858 
7859 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7860 				queue_dp = true;
7861 				DRM_DEBUG("IH: HPD_RX 2\n");
7862 
7863 				break;
7864 			case 8:
7865 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7866 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7867 
7868 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7869 				queue_dp = true;
7870 				DRM_DEBUG("IH: HPD_RX 3\n");
7871 
7872 				break;
7873 			case 9:
7874 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7875 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7876 
7877 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7878 				queue_dp = true;
7879 				DRM_DEBUG("IH: HPD_RX 4\n");
7880 
7881 				break;
7882 			case 10:
7883 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7884 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7885 
7886 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7887 				queue_dp = true;
7888 				DRM_DEBUG("IH: HPD_RX 5\n");
7889 
7890 				break;
7891 			case 11:
7892 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7893 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7894 
7895 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7896 				queue_dp = true;
7897 				DRM_DEBUG("IH: HPD_RX 6\n");
7898 
7899 				break;
7900 			default:
7901 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7902 				break;
7903 			}
7904 			break;
7905 		case 96:
7906 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7907 			WREG32(SRBM_INT_ACK, 0x1);
7908 			break;
7909 		case 124: /* UVD */
7910 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7911 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7912 			break;
7913 		case 146:
7914 		case 147:
7915 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7916 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7917 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7918 			/* reset addr and status */
7919 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7920 			if (addr == 0x0 && status == 0x0)
7921 				break;
7922 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7923 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7924 				addr);
7925 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7926 				status);
7927 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7928 			break;
7929 		case 167: /* VCE */
7930 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7931 			switch (src_data) {
7932 			case 0:
7933 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7934 				break;
7935 			case 1:
7936 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7937 				break;
7938 			default:
7939 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7940 				break;
7941 			}
7942 			break;
7943 		case 176: /* GFX RB CP_INT */
7944 		case 177: /* GFX IB CP_INT */
7945 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7946 			break;
7947 		case 181: /* CP EOP event */
7948 			DRM_DEBUG("IH: CP EOP\n");
7949 			/* XXX check the bitfield order! */
7950 			me_id = (ring_id & 0x60) >> 5;
7951 			pipe_id = (ring_id & 0x18) >> 3;
7952 			queue_id = (ring_id & 0x7) >> 0;
7953 			switch (me_id) {
7954 			case 0:
7955 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7956 				break;
7957 			case 1:
7958 			case 2:
7959 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7960 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7961 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7962 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7963 				break;
7964 			}
7965 			break;
7966 		case 184: /* CP Privileged reg access */
7967 			DRM_ERROR("Illegal register access in command stream\n");
7968 			/* XXX check the bitfield order! */
7969 			me_id = (ring_id & 0x60) >> 5;
7970 			pipe_id = (ring_id & 0x18) >> 3;
7971 			queue_id = (ring_id & 0x7) >> 0;
7972 			switch (me_id) {
7973 			case 0:
7974 				/* This results in a full GPU reset, but all we need to do is soft
7975 				 * reset the CP for gfx
7976 				 */
7977 				queue_reset = true;
7978 				break;
7979 			case 1:
7980 				/* XXX compute */
7981 				queue_reset = true;
7982 				break;
7983 			case 2:
7984 				/* XXX compute */
7985 				queue_reset = true;
7986 				break;
7987 			}
7988 			break;
7989 		case 185: /* CP Privileged inst */
7990 			DRM_ERROR("Illegal instruction in command stream\n");
7991 			/* XXX check the bitfield order! */
7992 			me_id = (ring_id & 0x60) >> 5;
7993 			pipe_id = (ring_id & 0x18) >> 3;
7994 			queue_id = (ring_id & 0x7) >> 0;
7995 			switch (me_id) {
7996 			case 0:
7997 				/* This results in a full GPU reset, but all we need to do is soft
7998 				 * reset the CP for gfx
7999 				 */
8000 				queue_reset = true;
8001 				break;
8002 			case 1:
8003 				/* XXX compute */
8004 				queue_reset = true;
8005 				break;
8006 			case 2:
8007 				/* XXX compute */
8008 				queue_reset = true;
8009 				break;
8010 			}
8011 			break;
8012 		case 224: /* SDMA trap event */
8013 			/* XXX check the bitfield order! */
8014 			me_id = (ring_id & 0x3) >> 0;
8015 			queue_id = (ring_id & 0xc) >> 2;
8016 			DRM_DEBUG("IH: SDMA trap\n");
8017 			switch (me_id) {
8018 			case 0:
8019 				switch (queue_id) {
8020 				case 0:
8021 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8022 					break;
8023 				case 1:
8024 					/* XXX compute */
8025 					break;
8026 				case 2:
8027 					/* XXX compute */
8028 					break;
8029 				}
8030 				break;
8031 			case 1:
8032 				switch (queue_id) {
8033 				case 0:
8034 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8035 					break;
8036 				case 1:
8037 					/* XXX compute */
8038 					break;
8039 				case 2:
8040 					/* XXX compute */
8041 					break;
8042 				}
8043 				break;
8044 			}
8045 			break;
8046 		case 230: /* thermal low to high */
8047 			DRM_DEBUG("IH: thermal low to high\n");
8048 			rdev->pm.dpm.thermal.high_to_low = false;
8049 			queue_thermal = true;
8050 			break;
8051 		case 231: /* thermal high to low */
8052 			DRM_DEBUG("IH: thermal high to low\n");
8053 			rdev->pm.dpm.thermal.high_to_low = true;
8054 			queue_thermal = true;
8055 			break;
8056 		case 233: /* GUI IDLE */
8057 			DRM_DEBUG("IH: GUI idle\n");
8058 			break;
8059 		case 241: /* SDMA Privileged inst */
8060 		case 247: /* SDMA Privileged inst */
8061 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8062 			/* XXX check the bitfield order! */
8063 			me_id = (ring_id & 0x3) >> 0;
8064 			queue_id = (ring_id & 0xc) >> 2;
8065 			switch (me_id) {
8066 			case 0:
8067 				switch (queue_id) {
8068 				case 0:
8069 					queue_reset = true;
8070 					break;
8071 				case 1:
8072 					/* XXX compute */
8073 					queue_reset = true;
8074 					break;
8075 				case 2:
8076 					/* XXX compute */
8077 					queue_reset = true;
8078 					break;
8079 				}
8080 				break;
8081 			case 1:
8082 				switch (queue_id) {
8083 				case 0:
8084 					queue_reset = true;
8085 					break;
8086 				case 1:
8087 					/* XXX compute */
8088 					queue_reset = true;
8089 					break;
8090 				case 2:
8091 					/* XXX compute */
8092 					queue_reset = true;
8093 					break;
8094 				}
8095 				break;
8096 			}
8097 			break;
8098 		default:
8099 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8100 			break;
8101 		}
8102 
8103 		/* wptr/rptr are in bytes! */
8104 		rptr += 16;
8105 		rptr &= rdev->ih.ptr_mask;
8106 		WREG32(IH_RB_RPTR, rptr);
8107 	}
8108 	if (queue_dp)
8109 		schedule_work(&rdev->dp_work);
8110 	if (queue_hotplug)
8111 		schedule_delayed_work(&rdev->hotplug_work, 0);
8112 	if (queue_reset) {
8113 		rdev->needs_reset = true;
8114 		wake_up_all(&rdev->fence_queue);
8115 	}
8116 	if (queue_thermal)
8117 		schedule_work(&rdev->pm.dpm.thermal.work);
8118 	rdev->ih.rptr = rptr;
8119 	atomic_set(&rdev->ih.lock, 0);
8120 
8121 	/* make sure wptr hasn't changed while processing */
8122 	wptr = cik_get_ih_wptr(rdev);
8123 	if (wptr != rptr)
8124 		goto restart_ih;
8125 
8126 	return IRQ_HANDLED;
8127 }
8128 
8129 /*
8130  * startup/shutdown callbacks
8131  */
8132 static void cik_uvd_init(struct radeon_device *rdev)
8133 {
8134 	int r;
8135 
8136 	if (!rdev->has_uvd)
8137 		return;
8138 
8139 	r = radeon_uvd_init(rdev);
8140 	if (r) {
8141 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8142 		/*
8143 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8144 		 * to early fails cik_uvd_start() and thus nothing happens
8145 		 * there. So it is pointless to try to go through that code
8146 		 * hence why we disable uvd here.
8147 		 */
8148 		rdev->has_uvd = 0;
8149 		return;
8150 	}
8151 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8152 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8153 }
8154 
8155 static void cik_uvd_start(struct radeon_device *rdev)
8156 {
8157 	int r;
8158 
8159 	if (!rdev->has_uvd)
8160 		return;
8161 
8162 	r = radeon_uvd_resume(rdev);
8163 	if (r) {
8164 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8165 		goto error;
8166 	}
8167 	r = uvd_v4_2_resume(rdev);
8168 	if (r) {
8169 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8170 		goto error;
8171 	}
8172 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8173 	if (r) {
8174 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8175 		goto error;
8176 	}
8177 	return;
8178 
8179 error:
8180 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8181 }
8182 
8183 static void cik_uvd_resume(struct radeon_device *rdev)
8184 {
8185 	struct radeon_ring *ring;
8186 	int r;
8187 
8188 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8189 		return;
8190 
8191 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8192 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8193 	if (r) {
8194 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8195 		return;
8196 	}
8197 	r = uvd_v1_0_init(rdev);
8198 	if (r) {
8199 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8200 		return;
8201 	}
8202 }
8203 
8204 static void cik_vce_init(struct radeon_device *rdev)
8205 {
8206 	int r;
8207 
8208 	if (!rdev->has_vce)
8209 		return;
8210 
8211 	r = radeon_vce_init(rdev);
8212 	if (r) {
8213 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8214 		/*
8215 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8216 		 * to early fails cik_vce_start() and thus nothing happens
8217 		 * there. So it is pointless to try to go through that code
8218 		 * hence why we disable vce here.
8219 		 */
8220 		rdev->has_vce = 0;
8221 		return;
8222 	}
8223 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8224 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8225 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8226 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8227 }
8228 
8229 static void cik_vce_start(struct radeon_device *rdev)
8230 {
8231 	int r;
8232 
8233 	if (!rdev->has_vce)
8234 		return;
8235 
8236 	r = radeon_vce_resume(rdev);
8237 	if (r) {
8238 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8239 		goto error;
8240 	}
8241 	r = vce_v2_0_resume(rdev);
8242 	if (r) {
8243 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8244 		goto error;
8245 	}
8246 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8247 	if (r) {
8248 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8249 		goto error;
8250 	}
8251 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8252 	if (r) {
8253 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8254 		goto error;
8255 	}
8256 	return;
8257 
8258 error:
8259 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8260 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8261 }
8262 
8263 static void cik_vce_resume(struct radeon_device *rdev)
8264 {
8265 	struct radeon_ring *ring;
8266 	int r;
8267 
8268 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8269 		return;
8270 
8271 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8272 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8273 	if (r) {
8274 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8275 		return;
8276 	}
8277 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8278 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8279 	if (r) {
8280 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8281 		return;
8282 	}
8283 	r = vce_v1_0_init(rdev);
8284 	if (r) {
8285 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8286 		return;
8287 	}
8288 }
8289 
8290 /**
8291  * cik_startup - program the asic to a functional state
8292  *
8293  * @rdev: radeon_device pointer
8294  *
8295  * Programs the asic to a functional state (CIK).
8296  * Called by cik_init() and cik_resume().
8297  * Returns 0 for success, error for failure.
8298  */
8299 static int cik_startup(struct radeon_device *rdev)
8300 {
8301 	struct radeon_ring *ring;
8302 	u32 nop;
8303 	int r;
8304 
8305 	/* enable pcie gen2/3 link */
8306 	cik_pcie_gen3_enable(rdev);
8307 	/* enable aspm */
8308 	cik_program_aspm(rdev);
8309 
8310 	/* scratch needs to be initialized before MC */
8311 	r = r600_vram_scratch_init(rdev);
8312 	if (r)
8313 		return r;
8314 
8315 	cik_mc_program(rdev);
8316 
8317 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8318 		r = ci_mc_load_microcode(rdev);
8319 		if (r) {
8320 			DRM_ERROR("Failed to load MC firmware!\n");
8321 			return r;
8322 		}
8323 	}
8324 
8325 	r = cik_pcie_gart_enable(rdev);
8326 	if (r)
8327 		return r;
8328 	cik_gpu_init(rdev);
8329 
8330 	/* allocate rlc buffers */
8331 	if (rdev->flags & RADEON_IS_IGP) {
8332 		if (rdev->family == CHIP_KAVERI) {
8333 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8334 			rdev->rlc.reg_list_size =
8335 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8336 		} else {
8337 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8338 			rdev->rlc.reg_list_size =
8339 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8340 		}
8341 	}
8342 	rdev->rlc.cs_data = ci_cs_data;
8343 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8344 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8345 	r = sumo_rlc_init(rdev);
8346 	if (r) {
8347 		DRM_ERROR("Failed to init rlc BOs!\n");
8348 		return r;
8349 	}
8350 
8351 	/* allocate wb buffer */
8352 	r = radeon_wb_init(rdev);
8353 	if (r)
8354 		return r;
8355 
8356 	/* allocate mec buffers */
8357 	r = cik_mec_init(rdev);
8358 	if (r) {
8359 		DRM_ERROR("Failed to init MEC BOs!\n");
8360 		return r;
8361 	}
8362 
8363 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8364 	if (r) {
8365 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8366 		return r;
8367 	}
8368 
8369 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8370 	if (r) {
8371 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8372 		return r;
8373 	}
8374 
8375 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8376 	if (r) {
8377 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8378 		return r;
8379 	}
8380 
8381 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8382 	if (r) {
8383 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8384 		return r;
8385 	}
8386 
8387 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8388 	if (r) {
8389 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8390 		return r;
8391 	}
8392 
8393 	cik_uvd_start(rdev);
8394 	cik_vce_start(rdev);
8395 
8396 	/* Enable IRQ */
8397 	if (!rdev->irq.installed) {
8398 		r = radeon_irq_kms_init(rdev);
8399 		if (r)
8400 			return r;
8401 	}
8402 
8403 	r = cik_irq_init(rdev);
8404 	if (r) {
8405 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8406 		radeon_irq_kms_fini(rdev);
8407 		return r;
8408 	}
8409 	cik_irq_set(rdev);
8410 
8411 	if (rdev->family == CHIP_HAWAII) {
8412 		if (rdev->new_fw)
8413 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414 		else
8415 			nop = RADEON_CP_PACKET2;
8416 	} else {
8417 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8418 	}
8419 
8420 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8421 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8422 			     nop);
8423 	if (r)
8424 		return r;
8425 
8426 	/* set up the compute queues */
8427 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8428 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8429 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8430 			     nop);
8431 	if (r)
8432 		return r;
8433 	ring->me = 1; /* first MEC */
8434 	ring->pipe = 0; /* first pipe */
8435 	ring->queue = 0; /* first queue */
8436 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8437 
8438 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8439 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8440 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8441 			     nop);
8442 	if (r)
8443 		return r;
8444 	/* dGPU only have 1 MEC */
8445 	ring->me = 1; /* first MEC */
8446 	ring->pipe = 0; /* first pipe */
8447 	ring->queue = 1; /* second queue */
8448 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8449 
8450 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8451 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8452 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8453 	if (r)
8454 		return r;
8455 
8456 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8457 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8458 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8459 	if (r)
8460 		return r;
8461 
8462 	r = cik_cp_resume(rdev);
8463 	if (r)
8464 		return r;
8465 
8466 	r = cik_sdma_resume(rdev);
8467 	if (r)
8468 		return r;
8469 
8470 	cik_uvd_resume(rdev);
8471 	cik_vce_resume(rdev);
8472 
8473 	r = radeon_ib_pool_init(rdev);
8474 	if (r) {
8475 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8476 		return r;
8477 	}
8478 
8479 	r = radeon_vm_manager_init(rdev);
8480 	if (r) {
8481 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8482 		return r;
8483 	}
8484 
8485 	r = radeon_audio_init(rdev);
8486 	if (r)
8487 		return r;
8488 
8489 	r = radeon_kfd_resume(rdev);
8490 	if (r)
8491 		return r;
8492 
8493 	return 0;
8494 }
8495 
8496 /**
8497  * cik_resume - resume the asic to a functional state
8498  *
8499  * @rdev: radeon_device pointer
8500  *
8501  * Programs the asic to a functional state (CIK).
8502  * Called at resume.
8503  * Returns 0 for success, error for failure.
8504  */
8505 int cik_resume(struct radeon_device *rdev)
8506 {
8507 	int r;
8508 
8509 	/* post card */
8510 	atom_asic_init(rdev->mode_info.atom_context);
8511 
8512 	/* init golden registers */
8513 	cik_init_golden_registers(rdev);
8514 
8515 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8516 		radeon_pm_resume(rdev);
8517 
8518 	rdev->accel_working = true;
8519 	r = cik_startup(rdev);
8520 	if (r) {
8521 		DRM_ERROR("cik startup failed on resume\n");
8522 		rdev->accel_working = false;
8523 		return r;
8524 	}
8525 
8526 	return r;
8527 
8528 }
8529 
8530 /**
8531  * cik_suspend - suspend the asic
8532  *
8533  * @rdev: radeon_device pointer
8534  *
8535  * Bring the chip into a state suitable for suspend (CIK).
8536  * Called at suspend.
8537  * Returns 0 for success.
8538  */
8539 int cik_suspend(struct radeon_device *rdev)
8540 {
8541 	radeon_kfd_suspend(rdev);
8542 	radeon_pm_suspend(rdev);
8543 	radeon_audio_fini(rdev);
8544 	radeon_vm_manager_fini(rdev);
8545 	cik_cp_enable(rdev, false);
8546 	cik_sdma_enable(rdev, false);
8547 	if (rdev->has_uvd) {
8548 		uvd_v1_0_fini(rdev);
8549 		radeon_uvd_suspend(rdev);
8550 	}
8551 	if (rdev->has_vce)
8552 		radeon_vce_suspend(rdev);
8553 	cik_fini_pg(rdev);
8554 	cik_fini_cg(rdev);
8555 	cik_irq_suspend(rdev);
8556 	radeon_wb_disable(rdev);
8557 	cik_pcie_gart_disable(rdev);
8558 	return 0;
8559 }
8560 
8561 /* Plan is to move initialization in that function and use
8562  * helper function so that radeon_device_init pretty much
8563  * do nothing more than calling asic specific function. This
8564  * should also allow to remove a bunch of callback function
8565  * like vram_info.
8566  */
8567 /**
8568  * cik_init - asic specific driver and hw init
8569  *
8570  * @rdev: radeon_device pointer
8571  *
8572  * Setup asic specific driver variables and program the hw
8573  * to a functional state (CIK).
8574  * Called at driver startup.
8575  * Returns 0 for success, errors for failure.
8576  */
8577 int cik_init(struct radeon_device *rdev)
8578 {
8579 	struct radeon_ring *ring;
8580 	int r;
8581 
8582 	/* Read BIOS */
8583 	if (!radeon_get_bios(rdev)) {
8584 		if (ASIC_IS_AVIVO(rdev))
8585 			return -EINVAL;
8586 	}
8587 	/* Must be an ATOMBIOS */
8588 	if (!rdev->is_atom_bios) {
8589 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8590 		return -EINVAL;
8591 	}
8592 	r = radeon_atombios_init(rdev);
8593 	if (r)
8594 		return r;
8595 
8596 	/* Post card if necessary */
8597 	if (!radeon_card_posted(rdev)) {
8598 		if (!rdev->bios) {
8599 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8600 			return -EINVAL;
8601 		}
8602 		DRM_INFO("GPU not posted. posting now...\n");
8603 		atom_asic_init(rdev->mode_info.atom_context);
8604 	}
8605 	/* init golden registers */
8606 	cik_init_golden_registers(rdev);
8607 	/* Initialize scratch registers */
8608 	cik_scratch_init(rdev);
8609 	/* Initialize surface registers */
8610 	radeon_surface_init(rdev);
8611 	/* Initialize clocks */
8612 	radeon_get_clock_info(rdev->ddev);
8613 
8614 	/* Fence driver */
8615 	r = radeon_fence_driver_init(rdev);
8616 	if (r)
8617 		return r;
8618 
8619 	/* initialize memory controller */
8620 	r = cik_mc_init(rdev);
8621 	if (r)
8622 		return r;
8623 	/* Memory manager */
8624 	r = radeon_bo_init(rdev);
8625 	if (r)
8626 		return r;
8627 
8628 	if (rdev->flags & RADEON_IS_IGP) {
8629 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8631 			r = cik_init_microcode(rdev);
8632 			if (r) {
8633 				DRM_ERROR("Failed to load firmware!\n");
8634 				return r;
8635 			}
8636 		}
8637 	} else {
8638 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8639 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8640 		    !rdev->mc_fw) {
8641 			r = cik_init_microcode(rdev);
8642 			if (r) {
8643 				DRM_ERROR("Failed to load firmware!\n");
8644 				return r;
8645 			}
8646 		}
8647 	}
8648 
8649 	/* Initialize power management */
8650 	radeon_pm_init(rdev);
8651 
8652 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8653 	ring->ring_obj = NULL;
8654 	r600_ring_init(rdev, ring, 1024 * 1024);
8655 
8656 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8657 	ring->ring_obj = NULL;
8658 	r600_ring_init(rdev, ring, 1024 * 1024);
8659 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8660 	if (r)
8661 		return r;
8662 
8663 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8664 	ring->ring_obj = NULL;
8665 	r600_ring_init(rdev, ring, 1024 * 1024);
8666 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8667 	if (r)
8668 		return r;
8669 
8670 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8671 	ring->ring_obj = NULL;
8672 	r600_ring_init(rdev, ring, 256 * 1024);
8673 
8674 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8675 	ring->ring_obj = NULL;
8676 	r600_ring_init(rdev, ring, 256 * 1024);
8677 
8678 	cik_uvd_init(rdev);
8679 	cik_vce_init(rdev);
8680 
8681 	rdev->ih.ring_obj = NULL;
8682 	r600_ih_ring_init(rdev, 64 * 1024);
8683 
8684 	r = r600_pcie_gart_init(rdev);
8685 	if (r)
8686 		return r;
8687 
8688 	rdev->accel_working = true;
8689 	r = cik_startup(rdev);
8690 	if (r) {
8691 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8692 		cik_cp_fini(rdev);
8693 		cik_sdma_fini(rdev);
8694 		cik_irq_fini(rdev);
8695 		sumo_rlc_fini(rdev);
8696 		cik_mec_fini(rdev);
8697 		radeon_wb_fini(rdev);
8698 		radeon_ib_pool_fini(rdev);
8699 		radeon_vm_manager_fini(rdev);
8700 		radeon_irq_kms_fini(rdev);
8701 		cik_pcie_gart_fini(rdev);
8702 		rdev->accel_working = false;
8703 	}
8704 
8705 	/* Don't start up if the MC ucode is missing.
8706 	 * The default clocks and voltages before the MC ucode
8707 	 * is loaded are not suffient for advanced operations.
8708 	 */
8709 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8710 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8711 		return -EINVAL;
8712 	}
8713 
8714 	return 0;
8715 }
8716 
8717 /**
8718  * cik_fini - asic specific driver and hw fini
8719  *
8720  * @rdev: radeon_device pointer
8721  *
8722  * Tear down the asic specific driver variables and program the hw
8723  * to an idle state (CIK).
8724  * Called at driver unload.
8725  */
8726 void cik_fini(struct radeon_device *rdev)
8727 {
8728 	radeon_pm_fini(rdev);
8729 	cik_cp_fini(rdev);
8730 	cik_sdma_fini(rdev);
8731 	cik_fini_pg(rdev);
8732 	cik_fini_cg(rdev);
8733 	cik_irq_fini(rdev);
8734 	sumo_rlc_fini(rdev);
8735 	cik_mec_fini(rdev);
8736 	radeon_wb_fini(rdev);
8737 	radeon_vm_manager_fini(rdev);
8738 	radeon_ib_pool_fini(rdev);
8739 	radeon_irq_kms_fini(rdev);
8740 	uvd_v1_0_fini(rdev);
8741 	radeon_uvd_fini(rdev);
8742 	radeon_vce_fini(rdev);
8743 	cik_pcie_gart_fini(rdev);
8744 	r600_vram_scratch_fini(rdev);
8745 	radeon_gem_fini(rdev);
8746 	radeon_fence_driver_fini(rdev);
8747 	radeon_bo_fini(rdev);
8748 	radeon_atombios_fini(rdev);
8749 	kfree(rdev->bios);
8750 	rdev->bios = NULL;
8751 }
8752 
8753 void dce8_program_fmt(struct drm_encoder *encoder)
8754 {
8755 	struct drm_device *dev = encoder->dev;
8756 	struct radeon_device *rdev = dev->dev_private;
8757 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8758 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8759 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8760 	int bpc = 0;
8761 	u32 tmp = 0;
8762 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8763 
8764 	if (connector) {
8765 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8766 		bpc = radeon_get_monitor_bpc(connector);
8767 		dither = radeon_connector->dither;
8768 	}
8769 
8770 	/* LVDS/eDP FMT is set up by atom */
8771 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8772 		return;
8773 
8774 	/* not needed for analog */
8775 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8776 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8777 		return;
8778 
8779 	if (bpc == 0)
8780 		return;
8781 
8782 	switch (bpc) {
8783 	case 6:
8784 		if (dither == RADEON_FMT_DITHER_ENABLE)
8785 			/* XXX sort out optimal dither settings */
8786 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8787 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8788 		else
8789 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8790 		break;
8791 	case 8:
8792 		if (dither == RADEON_FMT_DITHER_ENABLE)
8793 			/* XXX sort out optimal dither settings */
8794 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795 				FMT_RGB_RANDOM_ENABLE |
8796 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8797 		else
8798 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8799 		break;
8800 	case 10:
8801 		if (dither == RADEON_FMT_DITHER_ENABLE)
8802 			/* XXX sort out optimal dither settings */
8803 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8804 				FMT_RGB_RANDOM_ENABLE |
8805 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8806 		else
8807 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8808 		break;
8809 	default:
8810 		/* not needed */
8811 		break;
8812 	}
8813 
8814 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8815 }
8816 
8817 /* display watermark setup */
8818 /**
8819  * dce8_line_buffer_adjust - Set up the line buffer
8820  *
8821  * @rdev: radeon_device pointer
8822  * @radeon_crtc: the selected display controller
8823  * @mode: the current display mode on the selected display
8824  * controller
8825  *
8826  * Setup up the line buffer allocation for
8827  * the selected display controller (CIK).
8828  * Returns the line buffer size in pixels.
8829  */
8830 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8831 				   struct radeon_crtc *radeon_crtc,
8832 				   struct drm_display_mode *mode)
8833 {
8834 	u32 tmp, buffer_alloc, i;
8835 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8836 	/*
8837 	 * Line Buffer Setup
8838 	 * There are 6 line buffers, one for each display controllers.
8839 	 * There are 3 partitions per LB. Select the number of partitions
8840 	 * to enable based on the display width.  For display widths larger
8841 	 * than 4096, you need use to use 2 display controllers and combine
8842 	 * them using the stereo blender.
8843 	 */
8844 	if (radeon_crtc->base.enabled && mode) {
8845 		if (mode->crtc_hdisplay < 1920) {
8846 			tmp = 1;
8847 			buffer_alloc = 2;
8848 		} else if (mode->crtc_hdisplay < 2560) {
8849 			tmp = 2;
8850 			buffer_alloc = 2;
8851 		} else if (mode->crtc_hdisplay < 4096) {
8852 			tmp = 0;
8853 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8854 		} else {
8855 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8856 			tmp = 0;
8857 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8858 		}
8859 	} else {
8860 		tmp = 1;
8861 		buffer_alloc = 0;
8862 	}
8863 
8864 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8865 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8866 
8867 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8868 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8869 	for (i = 0; i < rdev->usec_timeout; i++) {
8870 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8871 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8872 			break;
8873 		udelay(1);
8874 	}
8875 
8876 	if (radeon_crtc->base.enabled && mode) {
8877 		switch (tmp) {
8878 		case 0:
8879 		default:
8880 			return 4096 * 2;
8881 		case 1:
8882 			return 1920 * 2;
8883 		case 2:
8884 			return 2560 * 2;
8885 		}
8886 	}
8887 
8888 	/* controller not enabled, so no lb used */
8889 	return 0;
8890 }
8891 
8892 /**
8893  * cik_get_number_of_dram_channels - get the number of dram channels
8894  *
8895  * @rdev: radeon_device pointer
8896  *
8897  * Look up the number of video ram channels (CIK).
8898  * Used for display watermark bandwidth calculations
8899  * Returns the number of dram channels
8900  */
8901 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8902 {
8903 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8904 
8905 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8906 	case 0:
8907 	default:
8908 		return 1;
8909 	case 1:
8910 		return 2;
8911 	case 2:
8912 		return 4;
8913 	case 3:
8914 		return 8;
8915 	case 4:
8916 		return 3;
8917 	case 5:
8918 		return 6;
8919 	case 6:
8920 		return 10;
8921 	case 7:
8922 		return 12;
8923 	case 8:
8924 		return 16;
8925 	}
8926 }
8927 
8928 struct dce8_wm_params {
8929 	u32 dram_channels; /* number of dram channels */
8930 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8931 	u32 sclk;          /* engine clock in kHz */
8932 	u32 disp_clk;      /* display clock in kHz */
8933 	u32 src_width;     /* viewport width */
8934 	u32 active_time;   /* active display time in ns */
8935 	u32 blank_time;    /* blank time in ns */
8936 	bool interlaced;    /* mode is interlaced */
8937 	fixed20_12 vsc;    /* vertical scale ratio */
8938 	u32 num_heads;     /* number of active crtcs */
8939 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8940 	u32 lb_size;       /* line buffer allocated to pipe */
8941 	u32 vtaps;         /* vertical scaler taps */
8942 };
8943 
8944 /**
8945  * dce8_dram_bandwidth - get the dram bandwidth
8946  *
8947  * @wm: watermark calculation data
8948  *
8949  * Calculate the raw dram bandwidth (CIK).
8950  * Used for display watermark bandwidth calculations
8951  * Returns the dram bandwidth in MBytes/s
8952  */
8953 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8954 {
8955 	/* Calculate raw DRAM Bandwidth */
8956 	fixed20_12 dram_efficiency; /* 0.7 */
8957 	fixed20_12 yclk, dram_channels, bandwidth;
8958 	fixed20_12 a;
8959 
8960 	a.full = dfixed_const(1000);
8961 	yclk.full = dfixed_const(wm->yclk);
8962 	yclk.full = dfixed_div(yclk, a);
8963 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8964 	a.full = dfixed_const(10);
8965 	dram_efficiency.full = dfixed_const(7);
8966 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8967 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8968 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8969 
8970 	return dfixed_trunc(bandwidth);
8971 }
8972 
8973 /**
8974  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8975  *
8976  * @wm: watermark calculation data
8977  *
8978  * Calculate the dram bandwidth used for display (CIK).
8979  * Used for display watermark bandwidth calculations
8980  * Returns the dram bandwidth for display in MBytes/s
8981  */
8982 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8983 {
8984 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8985 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8986 	fixed20_12 yclk, dram_channels, bandwidth;
8987 	fixed20_12 a;
8988 
8989 	a.full = dfixed_const(1000);
8990 	yclk.full = dfixed_const(wm->yclk);
8991 	yclk.full = dfixed_div(yclk, a);
8992 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8993 	a.full = dfixed_const(10);
8994 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8995 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8996 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8997 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8998 
8999 	return dfixed_trunc(bandwidth);
9000 }
9001 
9002 /**
9003  * dce8_data_return_bandwidth - get the data return bandwidth
9004  *
9005  * @wm: watermark calculation data
9006  *
9007  * Calculate the data return bandwidth used for display (CIK).
9008  * Used for display watermark bandwidth calculations
9009  * Returns the data return bandwidth in MBytes/s
9010  */
9011 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9012 {
9013 	/* Calculate the display Data return Bandwidth */
9014 	fixed20_12 return_efficiency; /* 0.8 */
9015 	fixed20_12 sclk, bandwidth;
9016 	fixed20_12 a;
9017 
9018 	a.full = dfixed_const(1000);
9019 	sclk.full = dfixed_const(wm->sclk);
9020 	sclk.full = dfixed_div(sclk, a);
9021 	a.full = dfixed_const(10);
9022 	return_efficiency.full = dfixed_const(8);
9023 	return_efficiency.full = dfixed_div(return_efficiency, a);
9024 	a.full = dfixed_const(32);
9025 	bandwidth.full = dfixed_mul(a, sclk);
9026 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9027 
9028 	return dfixed_trunc(bandwidth);
9029 }
9030 
9031 /**
9032  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9033  *
9034  * @wm: watermark calculation data
9035  *
9036  * Calculate the dmif bandwidth used for display (CIK).
9037  * Used for display watermark bandwidth calculations
9038  * Returns the dmif bandwidth in MBytes/s
9039  */
9040 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9041 {
9042 	/* Calculate the DMIF Request Bandwidth */
9043 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9044 	fixed20_12 disp_clk, bandwidth;
9045 	fixed20_12 a, b;
9046 
9047 	a.full = dfixed_const(1000);
9048 	disp_clk.full = dfixed_const(wm->disp_clk);
9049 	disp_clk.full = dfixed_div(disp_clk, a);
9050 	a.full = dfixed_const(32);
9051 	b.full = dfixed_mul(a, disp_clk);
9052 
9053 	a.full = dfixed_const(10);
9054 	disp_clk_request_efficiency.full = dfixed_const(8);
9055 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9056 
9057 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9058 
9059 	return dfixed_trunc(bandwidth);
9060 }
9061 
9062 /**
9063  * dce8_available_bandwidth - get the min available bandwidth
9064  *
9065  * @wm: watermark calculation data
9066  *
9067  * Calculate the min available bandwidth used for display (CIK).
9068  * Used for display watermark bandwidth calculations
9069  * Returns the min available bandwidth in MBytes/s
9070  */
9071 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9072 {
9073 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9074 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9075 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9076 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9077 
9078 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9079 }
9080 
9081 /**
9082  * dce8_average_bandwidth - get the average available bandwidth
9083  *
9084  * @wm: watermark calculation data
9085  *
9086  * Calculate the average available bandwidth used for display (CIK).
9087  * Used for display watermark bandwidth calculations
9088  * Returns the average available bandwidth in MBytes/s
9089  */
9090 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9091 {
9092 	/* Calculate the display mode Average Bandwidth
9093 	 * DisplayMode should contain the source and destination dimensions,
9094 	 * timing, etc.
9095 	 */
9096 	fixed20_12 bpp;
9097 	fixed20_12 line_time;
9098 	fixed20_12 src_width;
9099 	fixed20_12 bandwidth;
9100 	fixed20_12 a;
9101 
9102 	a.full = dfixed_const(1000);
9103 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9104 	line_time.full = dfixed_div(line_time, a);
9105 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9106 	src_width.full = dfixed_const(wm->src_width);
9107 	bandwidth.full = dfixed_mul(src_width, bpp);
9108 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9109 	bandwidth.full = dfixed_div(bandwidth, line_time);
9110 
9111 	return dfixed_trunc(bandwidth);
9112 }
9113 
9114 /**
9115  * dce8_latency_watermark - get the latency watermark
9116  *
9117  * @wm: watermark calculation data
9118  *
9119  * Calculate the latency watermark (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns the latency watermark in ns
9122  */
9123 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9124 {
9125 	/* First calculate the latency in ns */
9126 	u32 mc_latency = 2000; /* 2000 ns. */
9127 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9128 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9129 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9130 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9131 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9132 		(wm->num_heads * cursor_line_pair_return_time);
9133 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9134 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9135 	u32 tmp, dmif_size = 12288;
9136 	fixed20_12 a, b, c;
9137 
9138 	if (wm->num_heads == 0)
9139 		return 0;
9140 
9141 	a.full = dfixed_const(2);
9142 	b.full = dfixed_const(1);
9143 	if ((wm->vsc.full > a.full) ||
9144 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9145 	    (wm->vtaps >= 5) ||
9146 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9147 		max_src_lines_per_dst_line = 4;
9148 	else
9149 		max_src_lines_per_dst_line = 2;
9150 
9151 	a.full = dfixed_const(available_bandwidth);
9152 	b.full = dfixed_const(wm->num_heads);
9153 	a.full = dfixed_div(a, b);
9154 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9155 	tmp = min(dfixed_trunc(a), tmp);
9156 
9157 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9158 
9159 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9160 	b.full = dfixed_const(1000);
9161 	c.full = dfixed_const(lb_fill_bw);
9162 	b.full = dfixed_div(c, b);
9163 	a.full = dfixed_div(a, b);
9164 	line_fill_time = dfixed_trunc(a);
9165 
9166 	if (line_fill_time < wm->active_time)
9167 		return latency;
9168 	else
9169 		return latency + (line_fill_time - wm->active_time);
9170 
9171 }
9172 
9173 /**
9174  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9175  * average and available dram bandwidth
9176  *
9177  * @wm: watermark calculation data
9178  *
9179  * Check if the display average bandwidth fits in the display
9180  * dram bandwidth (CIK).
9181  * Used for display watermark bandwidth calculations
9182  * Returns true if the display fits, false if not.
9183  */
9184 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9185 {
9186 	if (dce8_average_bandwidth(wm) <=
9187 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9188 		return true;
9189 	else
9190 		return false;
9191 }
9192 
9193 /**
9194  * dce8_average_bandwidth_vs_available_bandwidth - check
9195  * average and available bandwidth
9196  *
9197  * @wm: watermark calculation data
9198  *
9199  * Check if the display average bandwidth fits in the display
9200  * available bandwidth (CIK).
9201  * Used for display watermark bandwidth calculations
9202  * Returns true if the display fits, false if not.
9203  */
9204 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9205 {
9206 	if (dce8_average_bandwidth(wm) <=
9207 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9208 		return true;
9209 	else
9210 		return false;
9211 }
9212 
9213 /**
9214  * dce8_check_latency_hiding - check latency hiding
9215  *
9216  * @wm: watermark calculation data
9217  *
9218  * Check latency hiding (CIK).
9219  * Used for display watermark bandwidth calculations
9220  * Returns true if the display fits, false if not.
9221  */
9222 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9223 {
9224 	u32 lb_partitions = wm->lb_size / wm->src_width;
9225 	u32 line_time = wm->active_time + wm->blank_time;
9226 	u32 latency_tolerant_lines;
9227 	u32 latency_hiding;
9228 	fixed20_12 a;
9229 
9230 	a.full = dfixed_const(1);
9231 	if (wm->vsc.full > a.full)
9232 		latency_tolerant_lines = 1;
9233 	else {
9234 		if (lb_partitions <= (wm->vtaps + 1))
9235 			latency_tolerant_lines = 1;
9236 		else
9237 			latency_tolerant_lines = 2;
9238 	}
9239 
9240 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9241 
9242 	if (dce8_latency_watermark(wm) <= latency_hiding)
9243 		return true;
9244 	else
9245 		return false;
9246 }
9247 
9248 /**
9249  * dce8_program_watermarks - program display watermarks
9250  *
9251  * @rdev: radeon_device pointer
9252  * @radeon_crtc: the selected display controller
9253  * @lb_size: line buffer size
9254  * @num_heads: number of display controllers in use
9255  *
9256  * Calculate and program the display watermarks for the
9257  * selected display controller (CIK).
9258  */
9259 static void dce8_program_watermarks(struct radeon_device *rdev,
9260 				    struct radeon_crtc *radeon_crtc,
9261 				    u32 lb_size, u32 num_heads)
9262 {
9263 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9264 	struct dce8_wm_params wm_low, wm_high;
9265 	u32 active_time;
9266 	u32 line_time = 0;
9267 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9268 	u32 tmp, wm_mask;
9269 
9270 	if (radeon_crtc->base.enabled && num_heads && mode) {
9271 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9272 					    (u32)mode->clock);
9273 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9274 					  (u32)mode->clock);
9275 		line_time = min(line_time, (u32)65535);
9276 
9277 		/* watermark for high clocks */
9278 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9279 		    rdev->pm.dpm_enabled) {
9280 			wm_high.yclk =
9281 				radeon_dpm_get_mclk(rdev, false) * 10;
9282 			wm_high.sclk =
9283 				radeon_dpm_get_sclk(rdev, false) * 10;
9284 		} else {
9285 			wm_high.yclk = rdev->pm.current_mclk * 10;
9286 			wm_high.sclk = rdev->pm.current_sclk * 10;
9287 		}
9288 
9289 		wm_high.disp_clk = mode->clock;
9290 		wm_high.src_width = mode->crtc_hdisplay;
9291 		wm_high.active_time = active_time;
9292 		wm_high.blank_time = line_time - wm_high.active_time;
9293 		wm_high.interlaced = false;
9294 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9295 			wm_high.interlaced = true;
9296 		wm_high.vsc = radeon_crtc->vsc;
9297 		wm_high.vtaps = 1;
9298 		if (radeon_crtc->rmx_type != RMX_OFF)
9299 			wm_high.vtaps = 2;
9300 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9301 		wm_high.lb_size = lb_size;
9302 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9303 		wm_high.num_heads = num_heads;
9304 
9305 		/* set for high clocks */
9306 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9307 
9308 		/* possibly force display priority to high */
9309 		/* should really do this at mode validation time... */
9310 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9311 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9312 		    !dce8_check_latency_hiding(&wm_high) ||
9313 		    (rdev->disp_priority == 2)) {
9314 			DRM_DEBUG_KMS("force priority to high\n");
9315 		}
9316 
9317 		/* watermark for low clocks */
9318 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9319 		    rdev->pm.dpm_enabled) {
9320 			wm_low.yclk =
9321 				radeon_dpm_get_mclk(rdev, true) * 10;
9322 			wm_low.sclk =
9323 				radeon_dpm_get_sclk(rdev, true) * 10;
9324 		} else {
9325 			wm_low.yclk = rdev->pm.current_mclk * 10;
9326 			wm_low.sclk = rdev->pm.current_sclk * 10;
9327 		}
9328 
9329 		wm_low.disp_clk = mode->clock;
9330 		wm_low.src_width = mode->crtc_hdisplay;
9331 		wm_low.active_time = active_time;
9332 		wm_low.blank_time = line_time - wm_low.active_time;
9333 		wm_low.interlaced = false;
9334 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9335 			wm_low.interlaced = true;
9336 		wm_low.vsc = radeon_crtc->vsc;
9337 		wm_low.vtaps = 1;
9338 		if (radeon_crtc->rmx_type != RMX_OFF)
9339 			wm_low.vtaps = 2;
9340 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9341 		wm_low.lb_size = lb_size;
9342 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9343 		wm_low.num_heads = num_heads;
9344 
9345 		/* set for low clocks */
9346 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9347 
9348 		/* possibly force display priority to high */
9349 		/* should really do this at mode validation time... */
9350 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9351 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9352 		    !dce8_check_latency_hiding(&wm_low) ||
9353 		    (rdev->disp_priority == 2)) {
9354 			DRM_DEBUG_KMS("force priority to high\n");
9355 		}
9356 
9357 		/* Save number of lines the linebuffer leads before the scanout */
9358 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9359 	}
9360 
9361 	/* select wm A */
9362 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363 	tmp = wm_mask;
9364 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9365 	tmp |= LATENCY_WATERMARK_MASK(1);
9366 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9367 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9368 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9369 		LATENCY_HIGH_WATERMARK(line_time)));
9370 	/* select wm B */
9371 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9372 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9373 	tmp |= LATENCY_WATERMARK_MASK(2);
9374 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9375 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9376 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9377 		LATENCY_HIGH_WATERMARK(line_time)));
9378 	/* restore original selection */
9379 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9380 
9381 	/* save values for DPM */
9382 	radeon_crtc->line_time = line_time;
9383 	radeon_crtc->wm_high = latency_watermark_a;
9384 	radeon_crtc->wm_low = latency_watermark_b;
9385 }
9386 
9387 /**
9388  * dce8_bandwidth_update - program display watermarks
9389  *
9390  * @rdev: radeon_device pointer
9391  *
9392  * Calculate and program the display watermarks and line
9393  * buffer allocation (CIK).
9394  */
9395 void dce8_bandwidth_update(struct radeon_device *rdev)
9396 {
9397 	struct drm_display_mode *mode = NULL;
9398 	u32 num_heads = 0, lb_size;
9399 	int i;
9400 
9401 	if (!rdev->mode_info.mode_config_initialized)
9402 		return;
9403 
9404 	radeon_update_display_priority(rdev);
9405 
9406 	for (i = 0; i < rdev->num_crtc; i++) {
9407 		if (rdev->mode_info.crtcs[i]->base.enabled)
9408 			num_heads++;
9409 	}
9410 	for (i = 0; i < rdev->num_crtc; i++) {
9411 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9412 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9413 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9414 	}
9415 }
9416 
9417 /**
9418  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9419  *
9420  * @rdev: radeon_device pointer
9421  *
9422  * Fetches a GPU clock counter snapshot (SI).
9423  * Returns the 64 bit clock counter snapshot.
9424  */
9425 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9426 {
9427 	uint64_t clock;
9428 
9429 	mutex_lock(&rdev->gpu_clock_mutex);
9430 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9431 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9432 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9433 	mutex_unlock(&rdev->gpu_clock_mutex);
9434 	return clock;
9435 }
9436 
9437 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9438 			     u32 cntl_reg, u32 status_reg)
9439 {
9440 	int r, i;
9441 	struct atom_clock_dividers dividers;
9442 	uint32_t tmp;
9443 
9444 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9445 					   clock, false, &dividers);
9446 	if (r)
9447 		return r;
9448 
9449 	tmp = RREG32_SMC(cntl_reg);
9450 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9451 	tmp |= dividers.post_divider;
9452 	WREG32_SMC(cntl_reg, tmp);
9453 
9454 	for (i = 0; i < 100; i++) {
9455 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9456 			break;
9457 		mdelay(10);
9458 	}
9459 	if (i == 100)
9460 		return -ETIMEDOUT;
9461 
9462 	return 0;
9463 }
9464 
9465 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9466 {
9467 	int r = 0;
9468 
9469 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9470 	if (r)
9471 		return r;
9472 
9473 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9474 	return r;
9475 }
9476 
9477 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9478 {
9479 	int r, i;
9480 	struct atom_clock_dividers dividers;
9481 	u32 tmp;
9482 
9483 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9484 					   ecclk, false, &dividers);
9485 	if (r)
9486 		return r;
9487 
9488 	for (i = 0; i < 100; i++) {
9489 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9490 			break;
9491 		mdelay(10);
9492 	}
9493 	if (i == 100)
9494 		return -ETIMEDOUT;
9495 
9496 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9497 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9498 	tmp |= dividers.post_divider;
9499 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9500 
9501 	for (i = 0; i < 100; i++) {
9502 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9503 			break;
9504 		mdelay(10);
9505 	}
9506 	if (i == 100)
9507 		return -ETIMEDOUT;
9508 
9509 	return 0;
9510 }
9511 
9512 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9513 {
9514 	struct pci_dev *root = rdev->pdev->bus->self;
9515 	int bridge_pos, gpu_pos;
9516 	u32 speed_cntl, mask, current_data_rate;
9517 	int ret, i;
9518 	u16 tmp16;
9519 
9520 	if (pci_is_root_bus(rdev->pdev->bus))
9521 		return;
9522 
9523 	if (radeon_pcie_gen2 == 0)
9524 		return;
9525 
9526 	if (rdev->flags & RADEON_IS_IGP)
9527 		return;
9528 
9529 	if (!(rdev->flags & RADEON_IS_PCIE))
9530 		return;
9531 
9532 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9533 	if (ret != 0)
9534 		return;
9535 
9536 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9537 		return;
9538 
9539 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9540 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9541 		LC_CURRENT_DATA_RATE_SHIFT;
9542 	if (mask & DRM_PCIE_SPEED_80) {
9543 		if (current_data_rate == 2) {
9544 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9545 			return;
9546 		}
9547 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9548 	} else if (mask & DRM_PCIE_SPEED_50) {
9549 		if (current_data_rate == 1) {
9550 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9551 			return;
9552 		}
9553 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9554 	}
9555 
9556 	bridge_pos = pci_pcie_cap(root);
9557 	if (!bridge_pos)
9558 		return;
9559 
9560 	gpu_pos = pci_pcie_cap(rdev->pdev);
9561 	if (!gpu_pos)
9562 		return;
9563 
9564 	if (mask & DRM_PCIE_SPEED_80) {
9565 		/* re-try equalization if gen3 is not already enabled */
9566 		if (current_data_rate != 2) {
9567 			u16 bridge_cfg, gpu_cfg;
9568 			u16 bridge_cfg2, gpu_cfg2;
9569 			u32 max_lw, current_lw, tmp;
9570 
9571 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9572 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9573 
9574 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9575 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9576 
9577 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9578 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9579 
9580 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9581 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9582 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9583 
9584 			if (current_lw < max_lw) {
9585 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9586 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9587 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9588 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9589 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9590 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9591 				}
9592 			}
9593 
9594 			for (i = 0; i < 10; i++) {
9595 				/* check status */
9596 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9597 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9598 					break;
9599 
9600 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9601 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9602 
9603 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9604 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9605 
9606 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9607 				tmp |= LC_SET_QUIESCE;
9608 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9609 
9610 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9611 				tmp |= LC_REDO_EQ;
9612 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9613 
9614 				mdelay(100);
9615 
9616 				/* linkctl */
9617 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9618 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9619 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9620 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9621 
9622 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9623 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9624 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9625 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9626 
9627 				/* linkctl2 */
9628 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9629 				tmp16 &= ~((1 << 4) | (7 << 9));
9630 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9631 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9632 
9633 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9634 				tmp16 &= ~((1 << 4) | (7 << 9));
9635 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9636 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9637 
9638 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9639 				tmp &= ~LC_SET_QUIESCE;
9640 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9641 			}
9642 		}
9643 	}
9644 
9645 	/* set the link speed */
9646 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9647 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9648 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9649 
9650 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9651 	tmp16 &= ~0xf;
9652 	if (mask & DRM_PCIE_SPEED_80)
9653 		tmp16 |= 3; /* gen3 */
9654 	else if (mask & DRM_PCIE_SPEED_50)
9655 		tmp16 |= 2; /* gen2 */
9656 	else
9657 		tmp16 |= 1; /* gen1 */
9658 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9659 
9660 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9661 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9662 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9663 
9664 	for (i = 0; i < rdev->usec_timeout; i++) {
9665 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9666 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9667 			break;
9668 		udelay(1);
9669 	}
9670 }
9671 
9672 static void cik_program_aspm(struct radeon_device *rdev)
9673 {
9674 	u32 data, orig;
9675 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9676 	bool disable_clkreq = false;
9677 
9678 	if (radeon_aspm == 0)
9679 		return;
9680 
9681 	/* XXX double check IGPs */
9682 	if (rdev->flags & RADEON_IS_IGP)
9683 		return;
9684 
9685 	if (!(rdev->flags & RADEON_IS_PCIE))
9686 		return;
9687 
9688 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9689 	data &= ~LC_XMIT_N_FTS_MASK;
9690 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9691 	if (orig != data)
9692 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9693 
9694 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9695 	data |= LC_GO_TO_RECOVERY;
9696 	if (orig != data)
9697 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9698 
9699 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9700 	data |= P_IGNORE_EDB_ERR;
9701 	if (orig != data)
9702 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9703 
9704 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9705 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9706 	data |= LC_PMI_TO_L1_DIS;
9707 	if (!disable_l0s)
9708 		data |= LC_L0S_INACTIVITY(7);
9709 
9710 	if (!disable_l1) {
9711 		data |= LC_L1_INACTIVITY(7);
9712 		data &= ~LC_PMI_TO_L1_DIS;
9713 		if (orig != data)
9714 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9715 
9716 		if (!disable_plloff_in_l1) {
9717 			bool clk_req_support;
9718 
9719 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9720 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9721 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9722 			if (orig != data)
9723 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9724 
9725 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9726 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9727 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9728 			if (orig != data)
9729 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9730 
9731 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9732 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9733 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9734 			if (orig != data)
9735 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9736 
9737 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9738 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9739 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9740 			if (orig != data)
9741 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9742 
9743 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9744 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9745 			data |= LC_DYN_LANES_PWR_STATE(3);
9746 			if (orig != data)
9747 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9748 
9749 			if (!disable_clkreq &&
9750 			    !pci_is_root_bus(rdev->pdev->bus)) {
9751 				struct pci_dev *root = rdev->pdev->bus->self;
9752 				u32 lnkcap;
9753 
9754 				clk_req_support = false;
9755 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9756 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9757 					clk_req_support = true;
9758 			} else {
9759 				clk_req_support = false;
9760 			}
9761 
9762 			if (clk_req_support) {
9763 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9764 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9765 				if (orig != data)
9766 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9767 
9768 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9769 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9770 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9771 				if (orig != data)
9772 					WREG32_SMC(THM_CLK_CNTL, data);
9773 
9774 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9775 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9776 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9777 				if (orig != data)
9778 					WREG32_SMC(MISC_CLK_CTRL, data);
9779 
9780 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9781 				data &= ~BCLK_AS_XCLK;
9782 				if (orig != data)
9783 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9784 
9785 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9786 				data &= ~FORCE_BIF_REFCLK_EN;
9787 				if (orig != data)
9788 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9789 
9790 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9791 				data &= ~MPLL_CLKOUT_SEL_MASK;
9792 				data |= MPLL_CLKOUT_SEL(4);
9793 				if (orig != data)
9794 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9795 			}
9796 		}
9797 	} else {
9798 		if (orig != data)
9799 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9800 	}
9801 
9802 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9803 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9804 	if (orig != data)
9805 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9806 
9807 	if (!disable_l0s) {
9808 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9809 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9810 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9811 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9812 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9813 				data &= ~LC_L0S_INACTIVITY_MASK;
9814 				if (orig != data)
9815 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9816 			}
9817 		}
9818 	}
9819 }
9820