xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 9cfc5c90)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		if ((rdev->pdev->device == 0x1304) ||
3603 		    (rdev->pdev->device == 0x1305) ||
3604 		    (rdev->pdev->device == 0x130C) ||
3605 		    (rdev->pdev->device == 0x130F) ||
3606 		    (rdev->pdev->device == 0x1310) ||
3607 		    (rdev->pdev->device == 0x1311) ||
3608 		    (rdev->pdev->device == 0x131C)) {
3609 			rdev->config.cik.max_cu_per_sh = 8;
3610 			rdev->config.cik.max_backends_per_se = 2;
3611 		} else if ((rdev->pdev->device == 0x1309) ||
3612 			   (rdev->pdev->device == 0x130A) ||
3613 			   (rdev->pdev->device == 0x130D) ||
3614 			   (rdev->pdev->device == 0x1313) ||
3615 			   (rdev->pdev->device == 0x131D)) {
3616 			rdev->config.cik.max_cu_per_sh = 6;
3617 			rdev->config.cik.max_backends_per_se = 2;
3618 		} else if ((rdev->pdev->device == 0x1306) ||
3619 			   (rdev->pdev->device == 0x1307) ||
3620 			   (rdev->pdev->device == 0x130B) ||
3621 			   (rdev->pdev->device == 0x130E) ||
3622 			   (rdev->pdev->device == 0x1315) ||
3623 			   (rdev->pdev->device == 0x1318) ||
3624 			   (rdev->pdev->device == 0x131B)) {
3625 			rdev->config.cik.max_cu_per_sh = 4;
3626 			rdev->config.cik.max_backends_per_se = 1;
3627 		} else {
3628 			rdev->config.cik.max_cu_per_sh = 3;
3629 			rdev->config.cik.max_backends_per_se = 1;
3630 		}
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_texture_channel_caches = 4;
3633 		rdev->config.cik.max_gprs = 256;
3634 		rdev->config.cik.max_gs_threads = 16;
3635 		rdev->config.cik.max_hw_contexts = 8;
3636 
3637 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642 		break;
3643 	case CHIP_KABINI:
3644 	case CHIP_MULLINS:
3645 	default:
3646 		rdev->config.cik.max_shader_engines = 1;
3647 		rdev->config.cik.max_tile_pipes = 2;
3648 		rdev->config.cik.max_cu_per_sh = 2;
3649 		rdev->config.cik.max_sh_per_se = 1;
3650 		rdev->config.cik.max_backends_per_se = 1;
3651 		rdev->config.cik.max_texture_channel_caches = 2;
3652 		rdev->config.cik.max_gprs = 256;
3653 		rdev->config.cik.max_gs_threads = 16;
3654 		rdev->config.cik.max_hw_contexts = 8;
3655 
3656 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661 		break;
3662 	}
3663 
3664 	/* Initialize HDP */
3665 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666 		WREG32((0x2c14 + j), 0x00000000);
3667 		WREG32((0x2c18 + j), 0x00000000);
3668 		WREG32((0x2c1c + j), 0x00000000);
3669 		WREG32((0x2c20 + j), 0x00000000);
3670 		WREG32((0x2c24 + j), 0x00000000);
3671 	}
3672 
3673 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674 	WREG32(SRBM_INT_CNTL, 0x1);
3675 	WREG32(SRBM_INT_ACK, 0x1);
3676 
3677 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678 
3679 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681 
3682 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3684 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3687 		rdev->config.cik.mem_row_size_in_kb = 4;
3688 	/* XXX use MC settings? */
3689 	rdev->config.cik.shader_engine_tile_size = 32;
3690 	rdev->config.cik.num_gpus = 1;
3691 	rdev->config.cik.multi_gpu_tile_size = 64;
3692 
3693 	/* fix up row size */
3694 	gb_addr_config &= ~ROW_SIZE_MASK;
3695 	switch (rdev->config.cik.mem_row_size_in_kb) {
3696 	case 1:
3697 	default:
3698 		gb_addr_config |= ROW_SIZE(0);
3699 		break;
3700 	case 2:
3701 		gb_addr_config |= ROW_SIZE(1);
3702 		break;
3703 	case 4:
3704 		gb_addr_config |= ROW_SIZE(2);
3705 		break;
3706 	}
3707 
3708 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3709 	 * not have bank info, so create a custom tiling dword.
3710 	 * bits 3:0   num_pipes
3711 	 * bits 7:4   num_banks
3712 	 * bits 11:8  group_size
3713 	 * bits 15:12 row_size
3714 	 */
3715 	rdev->config.cik.tile_config = 0;
3716 	switch (rdev->config.cik.num_tile_pipes) {
3717 	case 1:
3718 		rdev->config.cik.tile_config |= (0 << 0);
3719 		break;
3720 	case 2:
3721 		rdev->config.cik.tile_config |= (1 << 0);
3722 		break;
3723 	case 4:
3724 		rdev->config.cik.tile_config |= (2 << 0);
3725 		break;
3726 	case 8:
3727 	default:
3728 		/* XXX what about 12? */
3729 		rdev->config.cik.tile_config |= (3 << 0);
3730 		break;
3731 	}
3732 	rdev->config.cik.tile_config |=
3733 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734 	rdev->config.cik.tile_config |=
3735 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736 	rdev->config.cik.tile_config |=
3737 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738 
3739 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747 
3748 	cik_tiling_mode_table_init(rdev);
3749 
3750 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751 		     rdev->config.cik.max_sh_per_se,
3752 		     rdev->config.cik.max_backends_per_se);
3753 
3754 	rdev->config.cik.active_cus = 0;
3755 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757 			rdev->config.cik.active_cus +=
3758 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759 		}
3760 	}
3761 
3762 	/* set HW defaults for 3D engine */
3763 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764 
3765 	mutex_lock(&rdev->grbm_idx_mutex);
3766 	/*
3767 	 * making sure that the following register writes will be broadcasted
3768 	 * to all the shaders
3769 	 */
3770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771 	WREG32(SX_DEBUG_1, 0x20);
3772 
3773 	WREG32(TA_CNTL_AUX, 0x00010000);
3774 
3775 	tmp = RREG32(SPI_CONFIG_CNTL);
3776 	tmp |= 0x03000000;
3777 	WREG32(SPI_CONFIG_CNTL, tmp);
3778 
3779 	WREG32(SQ_CONFIG, 1);
3780 
3781 	WREG32(DB_DEBUG, 0);
3782 
3783 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784 	tmp |= 0x00000400;
3785 	WREG32(DB_DEBUG2, tmp);
3786 
3787 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788 	tmp |= 0x00020200;
3789 	WREG32(DB_DEBUG3, tmp);
3790 
3791 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792 	tmp |= 0x00018208;
3793 	WREG32(CB_HW_CONTROL, tmp);
3794 
3795 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796 
3797 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801 
3802 	WREG32(VGT_NUM_INSTANCES, 1);
3803 
3804 	WREG32(CP_PERFMON_CNTL, 0);
3805 
3806 	WREG32(SQ_CONFIG, 0);
3807 
3808 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809 					  FORCE_EOV_MAX_REZ_CNT(255)));
3810 
3811 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813 
3814 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3815 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816 
3817 	tmp = RREG32(HDP_MISC_CNTL);
3818 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819 	WREG32(HDP_MISC_CNTL, tmp);
3820 
3821 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823 
3824 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826 	mutex_unlock(&rdev->grbm_idx_mutex);
3827 
3828 	udelay(50);
3829 }
3830 
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846 	int i;
3847 
3848 	rdev->scratch.num_reg = 7;
3849 	rdev->scratch.reg_base = SCRATCH_REG0;
3850 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3851 		rdev->scratch.free[i] = true;
3852 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853 	}
3854 }
3855 
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869 	uint32_t scratch;
3870 	uint32_t tmp = 0;
3871 	unsigned i;
3872 	int r;
3873 
3874 	r = radeon_scratch_get(rdev, &scratch);
3875 	if (r) {
3876 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877 		return r;
3878 	}
3879 	WREG32(scratch, 0xCAFEDEAD);
3880 	r = radeon_ring_lock(rdev, ring, 3);
3881 	if (r) {
3882 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		return r;
3885 	}
3886 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888 	radeon_ring_write(ring, 0xDEADBEEF);
3889 	radeon_ring_unlock_commit(rdev, ring, false);
3890 
3891 	for (i = 0; i < rdev->usec_timeout; i++) {
3892 		tmp = RREG32(scratch);
3893 		if (tmp == 0xDEADBEEF)
3894 			break;
3895 		DRM_UDELAY(1);
3896 	}
3897 	if (i < rdev->usec_timeout) {
3898 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899 	} else {
3900 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901 			  ring->idx, scratch, tmp);
3902 		r = -EINVAL;
3903 	}
3904 	radeon_scratch_free(rdev, scratch);
3905 	return r;
3906 }
3907 
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917 				       int ridx)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[ridx];
3920 	u32 ref_and_mask;
3921 
3922 	switch (ring->idx) {
3923 	case CAYMAN_RING_TYPE_CP1_INDEX:
3924 	case CAYMAN_RING_TYPE_CP2_INDEX:
3925 	default:
3926 		switch (ring->me) {
3927 		case 0:
3928 			ref_and_mask = CP2 << ring->pipe;
3929 			break;
3930 		case 1:
3931 			ref_and_mask = CP6 << ring->pipe;
3932 			break;
3933 		default:
3934 			return;
3935 		}
3936 		break;
3937 	case RADEON_RING_TYPE_GFX_INDEX:
3938 		ref_and_mask = CP0;
3939 		break;
3940 	}
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948 	radeon_ring_write(ring, ref_and_mask);
3949 	radeon_ring_write(ring, ref_and_mask);
3950 	radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952 
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963 			     struct radeon_fence *fence)
3964 {
3965 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3966 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967 
3968 	/* Workaround for cache flush problems. First send a dummy EOP
3969 	 * event down the pipe with seq one below.
3970 	 */
3971 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973 				 EOP_TC_ACTION_EN |
3974 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975 				 EVENT_INDEX(5)));
3976 	radeon_ring_write(ring, addr & 0xfffffffc);
3977 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978 				DATA_SEL(1) | INT_SEL(0));
3979 	radeon_ring_write(ring, fence->seq - 1);
3980 	radeon_ring_write(ring, 0);
3981 
3982 	/* Then send the real EOP event down the pipe. */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, addr & 0xfffffffc);
3989 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990 	radeon_ring_write(ring, fence->seq);
3991 	radeon_ring_write(ring, 0);
3992 }
3993 
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004 				 struct radeon_fence *fence)
4005 {
4006 	struct radeon_ring *ring = &rdev->ring[fence->ring];
4007 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008 
4009 	/* RELEASE_MEM - flush caches, send int */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012 				 EOP_TC_ACTION_EN |
4013 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014 				 EVENT_INDEX(5)));
4015 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016 	radeon_ring_write(ring, addr & 0xfffffffc);
4017 	radeon_ring_write(ring, upper_32_bits(addr));
4018 	radeon_ring_write(ring, fence->seq);
4019 	radeon_ring_write(ring, 0);
4020 }
4021 
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034 			     struct radeon_ring *ring,
4035 			     struct radeon_semaphore *semaphore,
4036 			     bool emit_wait)
4037 {
4038 	uint64_t addr = semaphore->gpu_addr;
4039 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042 	radeon_ring_write(ring, lower_32_bits(addr));
4043 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044 
4045 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046 		/* Prevent the PFP from running ahead of the semaphore wait */
4047 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048 		radeon_ring_write(ring, 0x0);
4049 	}
4050 
4051 	return true;
4052 }
4053 
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068 				    uint64_t src_offset, uint64_t dst_offset,
4069 				    unsigned num_gpu_pages,
4070 				    struct reservation_object *resv)
4071 {
4072 	struct radeon_fence *fence;
4073 	struct radeon_sync sync;
4074 	int ring_index = rdev->asic->copy.blit_ring_index;
4075 	struct radeon_ring *ring = &rdev->ring[ring_index];
4076 	u32 size_in_bytes, cur_size_in_bytes, control;
4077 	int i, num_loops;
4078 	int r = 0;
4079 
4080 	radeon_sync_create(&sync);
4081 
4082 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085 	if (r) {
4086 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4087 		radeon_sync_free(rdev, &sync, NULL);
4088 		return ERR_PTR(r);
4089 	}
4090 
4091 	radeon_sync_resv(rdev, &sync, resv, false);
4092 	radeon_sync_rings(rdev, &sync, ring->idx);
4093 
4094 	for (i = 0; i < num_loops; i++) {
4095 		cur_size_in_bytes = size_in_bytes;
4096 		if (cur_size_in_bytes > 0x1fffff)
4097 			cur_size_in_bytes = 0x1fffff;
4098 		size_in_bytes -= cur_size_in_bytes;
4099 		control = 0;
4100 		if (size_in_bytes == 0)
4101 			control |= PACKET3_DMA_DATA_CP_SYNC;
4102 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103 		radeon_ring_write(ring, control);
4104 		radeon_ring_write(ring, lower_32_bits(src_offset));
4105 		radeon_ring_write(ring, upper_32_bits(src_offset));
4106 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4107 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4108 		radeon_ring_write(ring, cur_size_in_bytes);
4109 		src_offset += cur_size_in_bytes;
4110 		dst_offset += cur_size_in_bytes;
4111 	}
4112 
4113 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4114 	if (r) {
4115 		radeon_ring_unlock_undo(rdev, ring);
4116 		radeon_sync_free(rdev, &sync, NULL);
4117 		return ERR_PTR(r);
4118 	}
4119 
4120 	radeon_ring_unlock_commit(rdev, ring, false);
4121 	radeon_sync_free(rdev, &sync, fence);
4122 
4123 	return fence;
4124 }
4125 
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4144 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145 	u32 header, control = INDIRECT_BUFFER_VALID;
4146 
4147 	if (ib->is_const_ib) {
4148 		/* set switch buffer packet before const IB */
4149 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150 		radeon_ring_write(ring, 0);
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153 	} else {
4154 		u32 next_rptr;
4155 		if (ring->rptr_save_reg) {
4156 			next_rptr = ring->wptr + 3 + 4;
4157 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4159 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4160 			radeon_ring_write(ring, next_rptr);
4161 		} else if (rdev->wb.enabled) {
4162 			next_rptr = ring->wptr + 5 + 4;
4163 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167 			radeon_ring_write(ring, next_rptr);
4168 		}
4169 
4170 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171 	}
4172 
4173 	control |= ib->length_dw | (vm_id << 24);
4174 
4175 	radeon_ring_write(ring, header);
4176 	radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178 			  (2 << 0) |
4179 #endif
4180 			  (ib->gpu_addr & 0xFFFFFFFC));
4181 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182 	radeon_ring_write(ring, control);
4183 }
4184 
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197 	struct radeon_ib ib;
4198 	uint32_t scratch;
4199 	uint32_t tmp = 0;
4200 	unsigned i;
4201 	int r;
4202 
4203 	r = radeon_scratch_get(rdev, &scratch);
4204 	if (r) {
4205 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206 		return r;
4207 	}
4208 	WREG32(scratch, 0xCAFEDEAD);
4209 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210 	if (r) {
4211 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212 		radeon_scratch_free(rdev, scratch);
4213 		return r;
4214 	}
4215 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217 	ib.ptr[2] = 0xDEADBEEF;
4218 	ib.length_dw = 3;
4219 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220 	if (r) {
4221 		radeon_scratch_free(rdev, scratch);
4222 		radeon_ib_free(rdev, &ib);
4223 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224 		return r;
4225 	}
4226 	r = radeon_fence_wait(ib.fence, false);
4227 	if (r) {
4228 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229 		radeon_scratch_free(rdev, scratch);
4230 		radeon_ib_free(rdev, &ib);
4231 		return r;
4232 	}
4233 	for (i = 0; i < rdev->usec_timeout; i++) {
4234 		tmp = RREG32(scratch);
4235 		if (tmp == 0xDEADBEEF)
4236 			break;
4237 		DRM_UDELAY(1);
4238 	}
4239 	if (i < rdev->usec_timeout) {
4240 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241 	} else {
4242 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243 			  scratch, tmp);
4244 		r = -EINVAL;
4245 	}
4246 	radeon_scratch_free(rdev, scratch);
4247 	radeon_ib_free(rdev, &ib);
4248 	return r;
4249 }
4250 
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284 	if (enable)
4285 		WREG32(CP_ME_CNTL, 0);
4286 	else {
4287 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291 	}
4292 	udelay(50);
4293 }
4294 
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305 	int i;
4306 
4307 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308 		return -EINVAL;
4309 
4310 	cik_cp_gfx_enable(rdev, false);
4311 
4312 	if (rdev->new_fw) {
4313 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4316 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317 		const struct gfx_firmware_header_v1_0 *me_hdr =
4318 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319 		const __le32 *fw_data;
4320 		u32 fw_size;
4321 
4322 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325 
4326 		/* PFP */
4327 		fw_data = (const __le32 *)
4328 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330 		WREG32(CP_PFP_UCODE_ADDR, 0);
4331 		for (i = 0; i < fw_size; i++)
4332 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334 
4335 		/* CE */
4336 		fw_data = (const __le32 *)
4337 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339 		WREG32(CP_CE_UCODE_ADDR, 0);
4340 		for (i = 0; i < fw_size; i++)
4341 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343 
4344 		/* ME */
4345 		fw_data = (const __be32 *)
4346 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348 		WREG32(CP_ME_RAM_WADDR, 0);
4349 		for (i = 0; i < fw_size; i++)
4350 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353 	} else {
4354 		const __be32 *fw_data;
4355 
4356 		/* PFP */
4357 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4358 		WREG32(CP_PFP_UCODE_ADDR, 0);
4359 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_PFP_UCODE_ADDR, 0);
4362 
4363 		/* CE */
4364 		fw_data = (const __be32 *)rdev->ce_fw->data;
4365 		WREG32(CP_CE_UCODE_ADDR, 0);
4366 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368 		WREG32(CP_CE_UCODE_ADDR, 0);
4369 
4370 		/* ME */
4371 		fw_data = (const __be32 *)rdev->me_fw->data;
4372 		WREG32(CP_ME_RAM_WADDR, 0);
4373 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375 		WREG32(CP_ME_RAM_WADDR, 0);
4376 	}
4377 
4378 	return 0;
4379 }
4380 
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393 	int r, i;
4394 
4395 	/* init the CP */
4396 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397 	WREG32(CP_ENDIAN_SWAP, 0);
4398 	WREG32(CP_DEVICE_ID, 1);
4399 
4400 	cik_cp_gfx_enable(rdev, true);
4401 
4402 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403 	if (r) {
4404 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405 		return r;
4406 	}
4407 
4408 	/* init the CE partitions.  CE only used for gfx on CIK */
4409 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411 	radeon_ring_write(ring, 0x8000);
4412 	radeon_ring_write(ring, 0x8000);
4413 
4414 	/* setup clear context state */
4415 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417 
4418 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419 	radeon_ring_write(ring, 0x80000000);
4420 	radeon_ring_write(ring, 0x80000000);
4421 
4422 	for (i = 0; i < cik_default_size; i++)
4423 		radeon_ring_write(ring, cik_default_state[i]);
4424 
4425 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427 
4428 	/* set clear context state */
4429 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430 	radeon_ring_write(ring, 0);
4431 
4432 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433 	radeon_ring_write(ring, 0x00000316);
4434 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436 
4437 	radeon_ring_unlock_commit(rdev, ring, false);
4438 
4439 	return 0;
4440 }
4441 
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452 	cik_cp_gfx_enable(rdev, false);
4453 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455 
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467 	struct radeon_ring *ring;
4468 	u32 tmp;
4469 	u32 rb_bufsz;
4470 	u64 rb_addr;
4471 	int r;
4472 
4473 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474 	if (rdev->family != CHIP_HAWAII)
4475 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476 
4477 	/* Set the write pointer delay */
4478 	WREG32(CP_RB_WPTR_DELAY, 0);
4479 
4480 	/* set the RB to use vmid 0 */
4481 	WREG32(CP_RB_VMID, 0);
4482 
4483 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484 
4485 	/* ring 0 - compute and gfx */
4486 	/* Set ring buffer size */
4487 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488 	rb_bufsz = order_base_2(ring->ring_size / 8);
4489 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491 	tmp |= BUF_SWAP_32BIT;
4492 #endif
4493 	WREG32(CP_RB0_CNTL, tmp);
4494 
4495 	/* Initialize the ring buffer's read and write pointers */
4496 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497 	ring->wptr = 0;
4498 	WREG32(CP_RB0_WPTR, ring->wptr);
4499 
4500 	/* set the wb address wether it's enabled or not */
4501 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503 
4504 	/* scratch register shadowing is no longer supported */
4505 	WREG32(SCRATCH_UMSK, 0);
4506 
4507 	if (!rdev->wb.enabled)
4508 		tmp |= RB_NO_UPDATE;
4509 
4510 	mdelay(1);
4511 	WREG32(CP_RB0_CNTL, tmp);
4512 
4513 	rb_addr = ring->gpu_addr >> 8;
4514 	WREG32(CP_RB0_BASE, rb_addr);
4515 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516 
4517 	/* start the ring */
4518 	cik_cp_gfx_start(rdev);
4519 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521 	if (r) {
4522 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523 		return r;
4524 	}
4525 
4526 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528 
4529 	return 0;
4530 }
4531 
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533 		     struct radeon_ring *ring)
4534 {
4535 	u32 rptr;
4536 
4537 	if (rdev->wb.enabled)
4538 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4539 	else
4540 		rptr = RREG32(CP_RB0_RPTR);
4541 
4542 	return rptr;
4543 }
4544 
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546 		     struct radeon_ring *ring)
4547 {
4548 	u32 wptr;
4549 
4550 	wptr = RREG32(CP_RB0_WPTR);
4551 
4552 	return wptr;
4553 }
4554 
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556 		      struct radeon_ring *ring)
4557 {
4558 	WREG32(CP_RB0_WPTR, ring->wptr);
4559 	(void)RREG32(CP_RB0_WPTR);
4560 }
4561 
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563 			 struct radeon_ring *ring)
4564 {
4565 	u32 rptr;
4566 
4567 	if (rdev->wb.enabled) {
4568 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4569 	} else {
4570 		mutex_lock(&rdev->srbm_mutex);
4571 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572 		rptr = RREG32(CP_HQD_PQ_RPTR);
4573 		cik_srbm_select(rdev, 0, 0, 0, 0);
4574 		mutex_unlock(&rdev->srbm_mutex);
4575 	}
4576 
4577 	return rptr;
4578 }
4579 
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581 			 struct radeon_ring *ring)
4582 {
4583 	u32 wptr;
4584 
4585 	if (rdev->wb.enabled) {
4586 		/* XXX check if swapping is necessary on BE */
4587 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4588 	} else {
4589 		mutex_lock(&rdev->srbm_mutex);
4590 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591 		wptr = RREG32(CP_HQD_PQ_WPTR);
4592 		cik_srbm_select(rdev, 0, 0, 0, 0);
4593 		mutex_unlock(&rdev->srbm_mutex);
4594 	}
4595 
4596 	return wptr;
4597 }
4598 
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600 			  struct radeon_ring *ring)
4601 {
4602 	/* XXX check if swapping is necessary on BE */
4603 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606 
4607 static void cik_compute_stop(struct radeon_device *rdev,
4608 			     struct radeon_ring *ring)
4609 {
4610 	u32 j, tmp;
4611 
4612 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4613 	/* Disable wptr polling. */
4614 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4615 	tmp &= ~WPTR_POLL_EN;
4616 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4617 	/* Disable HQD. */
4618 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4619 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4620 		for (j = 0; j < rdev->usec_timeout; j++) {
4621 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4622 				break;
4623 			udelay(1);
4624 		}
4625 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4626 		WREG32(CP_HQD_PQ_RPTR, 0);
4627 		WREG32(CP_HQD_PQ_WPTR, 0);
4628 	}
4629 	cik_srbm_select(rdev, 0, 0, 0, 0);
4630 }
4631 
4632 /**
4633  * cik_cp_compute_enable - enable/disable the compute CP MEs
4634  *
4635  * @rdev: radeon_device pointer
4636  * @enable: enable or disable the MEs
4637  *
4638  * Halts or unhalts the compute MEs.
4639  */
4640 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4641 {
4642 	if (enable)
4643 		WREG32(CP_MEC_CNTL, 0);
4644 	else {
4645 		/*
4646 		 * To make hibernation reliable we need to clear compute ring
4647 		 * configuration before halting the compute ring.
4648 		 */
4649 		mutex_lock(&rdev->srbm_mutex);
4650 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4651 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4652 		mutex_unlock(&rdev->srbm_mutex);
4653 
4654 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4655 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4656 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4657 	}
4658 	udelay(50);
4659 }
4660 
4661 /**
4662  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4663  *
4664  * @rdev: radeon_device pointer
4665  *
4666  * Loads the compute MEC1&2 ucode.
4667  * Returns 0 for success, -EINVAL if the ucode is not available.
4668  */
4669 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4670 {
4671 	int i;
4672 
4673 	if (!rdev->mec_fw)
4674 		return -EINVAL;
4675 
4676 	cik_cp_compute_enable(rdev, false);
4677 
4678 	if (rdev->new_fw) {
4679 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4680 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4681 		const __le32 *fw_data;
4682 		u32 fw_size;
4683 
4684 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4685 
4686 		/* MEC1 */
4687 		fw_data = (const __le32 *)
4688 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4689 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4690 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4691 		for (i = 0; i < fw_size; i++)
4692 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4693 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4694 
4695 		/* MEC2 */
4696 		if (rdev->family == CHIP_KAVERI) {
4697 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4698 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4699 
4700 			fw_data = (const __le32 *)
4701 				(rdev->mec2_fw->data +
4702 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4703 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4704 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4705 			for (i = 0; i < fw_size; i++)
4706 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4707 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4708 		}
4709 	} else {
4710 		const __be32 *fw_data;
4711 
4712 		/* MEC1 */
4713 		fw_data = (const __be32 *)rdev->mec_fw->data;
4714 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4715 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4716 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4717 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4718 
4719 		if (rdev->family == CHIP_KAVERI) {
4720 			/* MEC2 */
4721 			fw_data = (const __be32 *)rdev->mec_fw->data;
4722 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4723 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4724 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4725 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4726 		}
4727 	}
4728 
4729 	return 0;
4730 }
4731 
4732 /**
4733  * cik_cp_compute_start - start the compute queues
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Enable the compute queues.
4738  * Returns 0 for success, error for failure.
4739  */
4740 static int cik_cp_compute_start(struct radeon_device *rdev)
4741 {
4742 	cik_cp_compute_enable(rdev, true);
4743 
4744 	return 0;
4745 }
4746 
4747 /**
4748  * cik_cp_compute_fini - stop the compute queues
4749  *
4750  * @rdev: radeon_device pointer
4751  *
4752  * Stop the compute queues and tear down the driver queue
4753  * info.
4754  */
4755 static void cik_cp_compute_fini(struct radeon_device *rdev)
4756 {
4757 	int i, idx, r;
4758 
4759 	cik_cp_compute_enable(rdev, false);
4760 
4761 	for (i = 0; i < 2; i++) {
4762 		if (i == 0)
4763 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4764 		else
4765 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4766 
4767 		if (rdev->ring[idx].mqd_obj) {
4768 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4769 			if (unlikely(r != 0))
4770 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4771 
4772 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4773 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4774 
4775 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4776 			rdev->ring[idx].mqd_obj = NULL;
4777 		}
4778 	}
4779 }
4780 
4781 static void cik_mec_fini(struct radeon_device *rdev)
4782 {
4783 	int r;
4784 
4785 	if (rdev->mec.hpd_eop_obj) {
4786 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4787 		if (unlikely(r != 0))
4788 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4789 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4790 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791 
4792 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4793 		rdev->mec.hpd_eop_obj = NULL;
4794 	}
4795 }
4796 
4797 #define MEC_HPD_SIZE 2048
4798 
4799 static int cik_mec_init(struct radeon_device *rdev)
4800 {
4801 	int r;
4802 	u32 *hpd;
4803 
4804 	/*
4805 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4806 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4807 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4808 	 * be handled by KFD
4809 	 */
4810 	rdev->mec.num_mec = 1;
4811 	rdev->mec.num_pipe = 1;
4812 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4813 
4814 	if (rdev->mec.hpd_eop_obj == NULL) {
4815 		r = radeon_bo_create(rdev,
4816 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4817 				     PAGE_SIZE, true,
4818 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4819 				     &rdev->mec.hpd_eop_obj);
4820 		if (r) {
4821 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4822 			return r;
4823 		}
4824 	}
4825 
4826 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4827 	if (unlikely(r != 0)) {
4828 		cik_mec_fini(rdev);
4829 		return r;
4830 	}
4831 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4832 			  &rdev->mec.hpd_eop_gpu_addr);
4833 	if (r) {
4834 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4835 		cik_mec_fini(rdev);
4836 		return r;
4837 	}
4838 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4839 	if (r) {
4840 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4841 		cik_mec_fini(rdev);
4842 		return r;
4843 	}
4844 
4845 	/* clear memory.  Not sure if this is required or not */
4846 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4847 
4848 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4849 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4850 
4851 	return 0;
4852 }
4853 
4854 struct hqd_registers
4855 {
4856 	u32 cp_mqd_base_addr;
4857 	u32 cp_mqd_base_addr_hi;
4858 	u32 cp_hqd_active;
4859 	u32 cp_hqd_vmid;
4860 	u32 cp_hqd_persistent_state;
4861 	u32 cp_hqd_pipe_priority;
4862 	u32 cp_hqd_queue_priority;
4863 	u32 cp_hqd_quantum;
4864 	u32 cp_hqd_pq_base;
4865 	u32 cp_hqd_pq_base_hi;
4866 	u32 cp_hqd_pq_rptr;
4867 	u32 cp_hqd_pq_rptr_report_addr;
4868 	u32 cp_hqd_pq_rptr_report_addr_hi;
4869 	u32 cp_hqd_pq_wptr_poll_addr;
4870 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4871 	u32 cp_hqd_pq_doorbell_control;
4872 	u32 cp_hqd_pq_wptr;
4873 	u32 cp_hqd_pq_control;
4874 	u32 cp_hqd_ib_base_addr;
4875 	u32 cp_hqd_ib_base_addr_hi;
4876 	u32 cp_hqd_ib_rptr;
4877 	u32 cp_hqd_ib_control;
4878 	u32 cp_hqd_iq_timer;
4879 	u32 cp_hqd_iq_rptr;
4880 	u32 cp_hqd_dequeue_request;
4881 	u32 cp_hqd_dma_offload;
4882 	u32 cp_hqd_sema_cmd;
4883 	u32 cp_hqd_msg_type;
4884 	u32 cp_hqd_atomic0_preop_lo;
4885 	u32 cp_hqd_atomic0_preop_hi;
4886 	u32 cp_hqd_atomic1_preop_lo;
4887 	u32 cp_hqd_atomic1_preop_hi;
4888 	u32 cp_hqd_hq_scheduler0;
4889 	u32 cp_hqd_hq_scheduler1;
4890 	u32 cp_mqd_control;
4891 };
4892 
4893 struct bonaire_mqd
4894 {
4895 	u32 header;
4896 	u32 dispatch_initiator;
4897 	u32 dimensions[3];
4898 	u32 start_idx[3];
4899 	u32 num_threads[3];
4900 	u32 pipeline_stat_enable;
4901 	u32 perf_counter_enable;
4902 	u32 pgm[2];
4903 	u32 tba[2];
4904 	u32 tma[2];
4905 	u32 pgm_rsrc[2];
4906 	u32 vmid;
4907 	u32 resource_limits;
4908 	u32 static_thread_mgmt01[2];
4909 	u32 tmp_ring_size;
4910 	u32 static_thread_mgmt23[2];
4911 	u32 restart[3];
4912 	u32 thread_trace_enable;
4913 	u32 reserved1;
4914 	u32 user_data[16];
4915 	u32 vgtcs_invoke_count[2];
4916 	struct hqd_registers queue_state;
4917 	u32 dequeue_cntr;
4918 	u32 interrupt_queue[64];
4919 };
4920 
4921 /**
4922  * cik_cp_compute_resume - setup the compute queue registers
4923  *
4924  * @rdev: radeon_device pointer
4925  *
4926  * Program the compute queues and test them to make sure they
4927  * are working.
4928  * Returns 0 for success, error for failure.
4929  */
4930 static int cik_cp_compute_resume(struct radeon_device *rdev)
4931 {
4932 	int r, i, j, idx;
4933 	u32 tmp;
4934 	bool use_doorbell = true;
4935 	u64 hqd_gpu_addr;
4936 	u64 mqd_gpu_addr;
4937 	u64 eop_gpu_addr;
4938 	u64 wb_gpu_addr;
4939 	u32 *buf;
4940 	struct bonaire_mqd *mqd;
4941 
4942 	r = cik_cp_compute_start(rdev);
4943 	if (r)
4944 		return r;
4945 
4946 	/* fix up chicken bits */
4947 	tmp = RREG32(CP_CPF_DEBUG);
4948 	tmp |= (1 << 23);
4949 	WREG32(CP_CPF_DEBUG, tmp);
4950 
4951 	/* init the pipes */
4952 	mutex_lock(&rdev->srbm_mutex);
4953 
4954 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4955 
4956 	cik_srbm_select(rdev, 0, 0, 0, 0);
4957 
4958 	/* write the EOP addr */
4959 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4960 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4961 
4962 	/* set the VMID assigned */
4963 	WREG32(CP_HPD_EOP_VMID, 0);
4964 
4965 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4966 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4967 	tmp &= ~EOP_SIZE_MASK;
4968 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4969 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4970 
4971 	mutex_unlock(&rdev->srbm_mutex);
4972 
4973 	/* init the queues.  Just two for now. */
4974 	for (i = 0; i < 2; i++) {
4975 		if (i == 0)
4976 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4977 		else
4978 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4979 
4980 		if (rdev->ring[idx].mqd_obj == NULL) {
4981 			r = radeon_bo_create(rdev,
4982 					     sizeof(struct bonaire_mqd),
4983 					     PAGE_SIZE, true,
4984 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4985 					     NULL, &rdev->ring[idx].mqd_obj);
4986 			if (r) {
4987 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4988 				return r;
4989 			}
4990 		}
4991 
4992 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4993 		if (unlikely(r != 0)) {
4994 			cik_cp_compute_fini(rdev);
4995 			return r;
4996 		}
4997 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4998 				  &mqd_gpu_addr);
4999 		if (r) {
5000 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
5001 			cik_cp_compute_fini(rdev);
5002 			return r;
5003 		}
5004 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5005 		if (r) {
5006 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5007 			cik_cp_compute_fini(rdev);
5008 			return r;
5009 		}
5010 
5011 		/* init the mqd struct */
5012 		memset(buf, 0, sizeof(struct bonaire_mqd));
5013 
5014 		mqd = (struct bonaire_mqd *)buf;
5015 		mqd->header = 0xC0310800;
5016 		mqd->static_thread_mgmt01[0] = 0xffffffff;
5017 		mqd->static_thread_mgmt01[1] = 0xffffffff;
5018 		mqd->static_thread_mgmt23[0] = 0xffffffff;
5019 		mqd->static_thread_mgmt23[1] = 0xffffffff;
5020 
5021 		mutex_lock(&rdev->srbm_mutex);
5022 		cik_srbm_select(rdev, rdev->ring[idx].me,
5023 				rdev->ring[idx].pipe,
5024 				rdev->ring[idx].queue, 0);
5025 
5026 		/* disable wptr polling */
5027 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5028 		tmp &= ~WPTR_POLL_EN;
5029 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5030 
5031 		/* enable doorbell? */
5032 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5033 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5034 		if (use_doorbell)
5035 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5036 		else
5037 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5038 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5040 
5041 		/* disable the queue if it's active */
5042 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5043 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5044 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5045 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5046 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5047 			for (j = 0; j < rdev->usec_timeout; j++) {
5048 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5049 					break;
5050 				udelay(1);
5051 			}
5052 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5053 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5054 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5055 		}
5056 
5057 		/* set the pointer to the MQD */
5058 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5059 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5060 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5061 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5062 		/* set MQD vmid to 0 */
5063 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5064 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5065 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5066 
5067 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5068 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5069 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5070 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5071 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5072 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5073 
5074 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5075 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5076 		mqd->queue_state.cp_hqd_pq_control &=
5077 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5078 
5079 		mqd->queue_state.cp_hqd_pq_control |=
5080 			order_base_2(rdev->ring[idx].ring_size / 8);
5081 		mqd->queue_state.cp_hqd_pq_control |=
5082 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5083 #ifdef __BIG_ENDIAN
5084 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5085 #endif
5086 		mqd->queue_state.cp_hqd_pq_control &=
5087 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5088 		mqd->queue_state.cp_hqd_pq_control |=
5089 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5090 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5091 
5092 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5093 		if (i == 0)
5094 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5095 		else
5096 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5097 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5098 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5099 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5100 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5101 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5102 
5103 		/* set the wb address wether it's enabled or not */
5104 		if (i == 0)
5105 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5106 		else
5107 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5108 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5109 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5110 			upper_32_bits(wb_gpu_addr) & 0xffff;
5111 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5112 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5113 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5114 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5115 
5116 		/* enable the doorbell if requested */
5117 		if (use_doorbell) {
5118 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5119 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5120 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5121 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5122 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5123 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5124 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5125 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5126 
5127 		} else {
5128 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5129 		}
5130 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5131 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5132 
5133 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5134 		rdev->ring[idx].wptr = 0;
5135 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5136 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5137 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5138 
5139 		/* set the vmid for the queue */
5140 		mqd->queue_state.cp_hqd_vmid = 0;
5141 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5142 
5143 		/* activate the queue */
5144 		mqd->queue_state.cp_hqd_active = 1;
5145 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5146 
5147 		cik_srbm_select(rdev, 0, 0, 0, 0);
5148 		mutex_unlock(&rdev->srbm_mutex);
5149 
5150 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5151 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5152 
5153 		rdev->ring[idx].ready = true;
5154 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5155 		if (r)
5156 			rdev->ring[idx].ready = false;
5157 	}
5158 
5159 	return 0;
5160 }
5161 
5162 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5163 {
5164 	cik_cp_gfx_enable(rdev, enable);
5165 	cik_cp_compute_enable(rdev, enable);
5166 }
5167 
5168 static int cik_cp_load_microcode(struct radeon_device *rdev)
5169 {
5170 	int r;
5171 
5172 	r = cik_cp_gfx_load_microcode(rdev);
5173 	if (r)
5174 		return r;
5175 	r = cik_cp_compute_load_microcode(rdev);
5176 	if (r)
5177 		return r;
5178 
5179 	return 0;
5180 }
5181 
5182 static void cik_cp_fini(struct radeon_device *rdev)
5183 {
5184 	cik_cp_gfx_fini(rdev);
5185 	cik_cp_compute_fini(rdev);
5186 }
5187 
5188 static int cik_cp_resume(struct radeon_device *rdev)
5189 {
5190 	int r;
5191 
5192 	cik_enable_gui_idle_interrupt(rdev, false);
5193 
5194 	r = cik_cp_load_microcode(rdev);
5195 	if (r)
5196 		return r;
5197 
5198 	r = cik_cp_gfx_resume(rdev);
5199 	if (r)
5200 		return r;
5201 	r = cik_cp_compute_resume(rdev);
5202 	if (r)
5203 		return r;
5204 
5205 	cik_enable_gui_idle_interrupt(rdev, true);
5206 
5207 	return 0;
5208 }
5209 
5210 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5211 {
5212 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5213 		RREG32(GRBM_STATUS));
5214 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5215 		RREG32(GRBM_STATUS2));
5216 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5217 		RREG32(GRBM_STATUS_SE0));
5218 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5219 		RREG32(GRBM_STATUS_SE1));
5220 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5221 		RREG32(GRBM_STATUS_SE2));
5222 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5223 		RREG32(GRBM_STATUS_SE3));
5224 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5225 		RREG32(SRBM_STATUS));
5226 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5227 		RREG32(SRBM_STATUS2));
5228 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5229 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5230 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5231 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5232 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5233 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5234 		 RREG32(CP_STALLED_STAT1));
5235 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5236 		 RREG32(CP_STALLED_STAT2));
5237 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5238 		 RREG32(CP_STALLED_STAT3));
5239 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5240 		 RREG32(CP_CPF_BUSY_STAT));
5241 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5242 		 RREG32(CP_CPF_STALLED_STAT1));
5243 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5244 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5245 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5246 		 RREG32(CP_CPC_STALLED_STAT1));
5247 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5248 }
5249 
5250 /**
5251  * cik_gpu_check_soft_reset - check which blocks are busy
5252  *
5253  * @rdev: radeon_device pointer
5254  *
5255  * Check which blocks are busy and return the relevant reset
5256  * mask to be used by cik_gpu_soft_reset().
5257  * Returns a mask of the blocks to be reset.
5258  */
5259 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5260 {
5261 	u32 reset_mask = 0;
5262 	u32 tmp;
5263 
5264 	/* GRBM_STATUS */
5265 	tmp = RREG32(GRBM_STATUS);
5266 	if (tmp & (PA_BUSY | SC_BUSY |
5267 		   BCI_BUSY | SX_BUSY |
5268 		   TA_BUSY | VGT_BUSY |
5269 		   DB_BUSY | CB_BUSY |
5270 		   GDS_BUSY | SPI_BUSY |
5271 		   IA_BUSY | IA_BUSY_NO_DMA))
5272 		reset_mask |= RADEON_RESET_GFX;
5273 
5274 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5275 		reset_mask |= RADEON_RESET_CP;
5276 
5277 	/* GRBM_STATUS2 */
5278 	tmp = RREG32(GRBM_STATUS2);
5279 	if (tmp & RLC_BUSY)
5280 		reset_mask |= RADEON_RESET_RLC;
5281 
5282 	/* SDMA0_STATUS_REG */
5283 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5284 	if (!(tmp & SDMA_IDLE))
5285 		reset_mask |= RADEON_RESET_DMA;
5286 
5287 	/* SDMA1_STATUS_REG */
5288 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5289 	if (!(tmp & SDMA_IDLE))
5290 		reset_mask |= RADEON_RESET_DMA1;
5291 
5292 	/* SRBM_STATUS2 */
5293 	tmp = RREG32(SRBM_STATUS2);
5294 	if (tmp & SDMA_BUSY)
5295 		reset_mask |= RADEON_RESET_DMA;
5296 
5297 	if (tmp & SDMA1_BUSY)
5298 		reset_mask |= RADEON_RESET_DMA1;
5299 
5300 	/* SRBM_STATUS */
5301 	tmp = RREG32(SRBM_STATUS);
5302 
5303 	if (tmp & IH_BUSY)
5304 		reset_mask |= RADEON_RESET_IH;
5305 
5306 	if (tmp & SEM_BUSY)
5307 		reset_mask |= RADEON_RESET_SEM;
5308 
5309 	if (tmp & GRBM_RQ_PENDING)
5310 		reset_mask |= RADEON_RESET_GRBM;
5311 
5312 	if (tmp & VMC_BUSY)
5313 		reset_mask |= RADEON_RESET_VMC;
5314 
5315 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5316 		   MCC_BUSY | MCD_BUSY))
5317 		reset_mask |= RADEON_RESET_MC;
5318 
5319 	if (evergreen_is_display_hung(rdev))
5320 		reset_mask |= RADEON_RESET_DISPLAY;
5321 
5322 	/* Skip MC reset as it's mostly likely not hung, just busy */
5323 	if (reset_mask & RADEON_RESET_MC) {
5324 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5325 		reset_mask &= ~RADEON_RESET_MC;
5326 	}
5327 
5328 	return reset_mask;
5329 }
5330 
5331 /**
5332  * cik_gpu_soft_reset - soft reset GPU
5333  *
5334  * @rdev: radeon_device pointer
5335  * @reset_mask: mask of which blocks to reset
5336  *
5337  * Soft reset the blocks specified in @reset_mask.
5338  */
5339 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5340 {
5341 	struct evergreen_mc_save save;
5342 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343 	u32 tmp;
5344 
5345 	if (reset_mask == 0)
5346 		return;
5347 
5348 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5349 
5350 	cik_print_gpu_status_regs(rdev);
5351 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5352 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5353 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5354 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5355 
5356 	/* disable CG/PG */
5357 	cik_fini_pg(rdev);
5358 	cik_fini_cg(rdev);
5359 
5360 	/* stop the rlc */
5361 	cik_rlc_stop(rdev);
5362 
5363 	/* Disable GFX parsing/prefetching */
5364 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5365 
5366 	/* Disable MEC parsing/prefetching */
5367 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5368 
5369 	if (reset_mask & RADEON_RESET_DMA) {
5370 		/* sdma0 */
5371 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5372 		tmp |= SDMA_HALT;
5373 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5374 	}
5375 	if (reset_mask & RADEON_RESET_DMA1) {
5376 		/* sdma1 */
5377 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5378 		tmp |= SDMA_HALT;
5379 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5380 	}
5381 
5382 	evergreen_mc_stop(rdev, &save);
5383 	if (evergreen_mc_wait_for_idle(rdev)) {
5384 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385 	}
5386 
5387 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5388 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5389 
5390 	if (reset_mask & RADEON_RESET_CP) {
5391 		grbm_soft_reset |= SOFT_RESET_CP;
5392 
5393 		srbm_soft_reset |= SOFT_RESET_GRBM;
5394 	}
5395 
5396 	if (reset_mask & RADEON_RESET_DMA)
5397 		srbm_soft_reset |= SOFT_RESET_SDMA;
5398 
5399 	if (reset_mask & RADEON_RESET_DMA1)
5400 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5401 
5402 	if (reset_mask & RADEON_RESET_DISPLAY)
5403 		srbm_soft_reset |= SOFT_RESET_DC;
5404 
5405 	if (reset_mask & RADEON_RESET_RLC)
5406 		grbm_soft_reset |= SOFT_RESET_RLC;
5407 
5408 	if (reset_mask & RADEON_RESET_SEM)
5409 		srbm_soft_reset |= SOFT_RESET_SEM;
5410 
5411 	if (reset_mask & RADEON_RESET_IH)
5412 		srbm_soft_reset |= SOFT_RESET_IH;
5413 
5414 	if (reset_mask & RADEON_RESET_GRBM)
5415 		srbm_soft_reset |= SOFT_RESET_GRBM;
5416 
5417 	if (reset_mask & RADEON_RESET_VMC)
5418 		srbm_soft_reset |= SOFT_RESET_VMC;
5419 
5420 	if (!(rdev->flags & RADEON_IS_IGP)) {
5421 		if (reset_mask & RADEON_RESET_MC)
5422 			srbm_soft_reset |= SOFT_RESET_MC;
5423 	}
5424 
5425 	if (grbm_soft_reset) {
5426 		tmp = RREG32(GRBM_SOFT_RESET);
5427 		tmp |= grbm_soft_reset;
5428 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5429 		WREG32(GRBM_SOFT_RESET, tmp);
5430 		tmp = RREG32(GRBM_SOFT_RESET);
5431 
5432 		udelay(50);
5433 
5434 		tmp &= ~grbm_soft_reset;
5435 		WREG32(GRBM_SOFT_RESET, tmp);
5436 		tmp = RREG32(GRBM_SOFT_RESET);
5437 	}
5438 
5439 	if (srbm_soft_reset) {
5440 		tmp = RREG32(SRBM_SOFT_RESET);
5441 		tmp |= srbm_soft_reset;
5442 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5443 		WREG32(SRBM_SOFT_RESET, tmp);
5444 		tmp = RREG32(SRBM_SOFT_RESET);
5445 
5446 		udelay(50);
5447 
5448 		tmp &= ~srbm_soft_reset;
5449 		WREG32(SRBM_SOFT_RESET, tmp);
5450 		tmp = RREG32(SRBM_SOFT_RESET);
5451 	}
5452 
5453 	/* Wait a little for things to settle down */
5454 	udelay(50);
5455 
5456 	evergreen_mc_resume(rdev, &save);
5457 	udelay(50);
5458 
5459 	cik_print_gpu_status_regs(rdev);
5460 }
5461 
5462 struct kv_reset_save_regs {
5463 	u32 gmcon_reng_execute;
5464 	u32 gmcon_misc;
5465 	u32 gmcon_misc3;
5466 };
5467 
5468 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5469 				   struct kv_reset_save_regs *save)
5470 {
5471 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5472 	save->gmcon_misc = RREG32(GMCON_MISC);
5473 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5474 
5475 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5476 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5477 						STCTRL_STUTTER_EN));
5478 }
5479 
5480 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5481 				      struct kv_reset_save_regs *save)
5482 {
5483 	int i;
5484 
5485 	WREG32(GMCON_PGFSM_WRITE, 0);
5486 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5487 
5488 	for (i = 0; i < 5; i++)
5489 		WREG32(GMCON_PGFSM_WRITE, 0);
5490 
5491 	WREG32(GMCON_PGFSM_WRITE, 0);
5492 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5493 
5494 	for (i = 0; i < 5; i++)
5495 		WREG32(GMCON_PGFSM_WRITE, 0);
5496 
5497 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5498 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5499 
5500 	for (i = 0; i < 5; i++)
5501 		WREG32(GMCON_PGFSM_WRITE, 0);
5502 
5503 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5504 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5505 
5506 	for (i = 0; i < 5; i++)
5507 		WREG32(GMCON_PGFSM_WRITE, 0);
5508 
5509 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5510 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5511 
5512 	for (i = 0; i < 5; i++)
5513 		WREG32(GMCON_PGFSM_WRITE, 0);
5514 
5515 	WREG32(GMCON_PGFSM_WRITE, 0);
5516 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5517 
5518 	for (i = 0; i < 5; i++)
5519 		WREG32(GMCON_PGFSM_WRITE, 0);
5520 
5521 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5522 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5523 
5524 	for (i = 0; i < 5; i++)
5525 		WREG32(GMCON_PGFSM_WRITE, 0);
5526 
5527 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5528 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5529 
5530 	for (i = 0; i < 5; i++)
5531 		WREG32(GMCON_PGFSM_WRITE, 0);
5532 
5533 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5534 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5535 
5536 	for (i = 0; i < 5; i++)
5537 		WREG32(GMCON_PGFSM_WRITE, 0);
5538 
5539 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5540 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5541 
5542 	for (i = 0; i < 5; i++)
5543 		WREG32(GMCON_PGFSM_WRITE, 0);
5544 
5545 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5546 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5547 
5548 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5549 	WREG32(GMCON_MISC, save->gmcon_misc);
5550 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5551 }
5552 
5553 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5554 {
5555 	struct evergreen_mc_save save;
5556 	struct kv_reset_save_regs kv_save = { 0 };
5557 	u32 tmp, i;
5558 
5559 	dev_info(rdev->dev, "GPU pci config reset\n");
5560 
5561 	/* disable dpm? */
5562 
5563 	/* disable cg/pg */
5564 	cik_fini_pg(rdev);
5565 	cik_fini_cg(rdev);
5566 
5567 	/* Disable GFX parsing/prefetching */
5568 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5569 
5570 	/* Disable MEC parsing/prefetching */
5571 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5572 
5573 	/* sdma0 */
5574 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5575 	tmp |= SDMA_HALT;
5576 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5577 	/* sdma1 */
5578 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5579 	tmp |= SDMA_HALT;
5580 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5581 	/* XXX other engines? */
5582 
5583 	/* halt the rlc, disable cp internal ints */
5584 	cik_rlc_stop(rdev);
5585 
5586 	udelay(50);
5587 
5588 	/* disable mem access */
5589 	evergreen_mc_stop(rdev, &save);
5590 	if (evergreen_mc_wait_for_idle(rdev)) {
5591 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5592 	}
5593 
5594 	if (rdev->flags & RADEON_IS_IGP)
5595 		kv_save_regs_for_reset(rdev, &kv_save);
5596 
5597 	/* disable BM */
5598 	pci_clear_master(rdev->pdev);
5599 	/* reset */
5600 	radeon_pci_config_reset(rdev);
5601 
5602 	udelay(100);
5603 
5604 	/* wait for asic to come out of reset */
5605 	for (i = 0; i < rdev->usec_timeout; i++) {
5606 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5607 			break;
5608 		udelay(1);
5609 	}
5610 
5611 	/* does asic init need to be run first??? */
5612 	if (rdev->flags & RADEON_IS_IGP)
5613 		kv_restore_regs_for_reset(rdev, &kv_save);
5614 }
5615 
5616 /**
5617  * cik_asic_reset - soft reset GPU
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Look up which blocks are hung and attempt
5622  * to reset them.
5623  * Returns 0 for success.
5624  */
5625 int cik_asic_reset(struct radeon_device *rdev)
5626 {
5627 	u32 reset_mask;
5628 
5629 	reset_mask = cik_gpu_check_soft_reset(rdev);
5630 
5631 	if (reset_mask)
5632 		r600_set_bios_scratch_engine_hung(rdev, true);
5633 
5634 	/* try soft reset */
5635 	cik_gpu_soft_reset(rdev, reset_mask);
5636 
5637 	reset_mask = cik_gpu_check_soft_reset(rdev);
5638 
5639 	/* try pci config reset */
5640 	if (reset_mask && radeon_hard_reset)
5641 		cik_gpu_pci_config_reset(rdev);
5642 
5643 	reset_mask = cik_gpu_check_soft_reset(rdev);
5644 
5645 	if (!reset_mask)
5646 		r600_set_bios_scratch_engine_hung(rdev, false);
5647 
5648 	return 0;
5649 }
5650 
5651 /**
5652  * cik_gfx_is_lockup - check if the 3D engine is locked up
5653  *
5654  * @rdev: radeon_device pointer
5655  * @ring: radeon_ring structure holding ring information
5656  *
5657  * Check if the 3D engine is locked up (CIK).
5658  * Returns true if the engine is locked, false if not.
5659  */
5660 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5661 {
5662 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5663 
5664 	if (!(reset_mask & (RADEON_RESET_GFX |
5665 			    RADEON_RESET_COMPUTE |
5666 			    RADEON_RESET_CP))) {
5667 		radeon_ring_lockup_update(rdev, ring);
5668 		return false;
5669 	}
5670 	return radeon_ring_test_lockup(rdev, ring);
5671 }
5672 
5673 /* MC */
5674 /**
5675  * cik_mc_program - program the GPU memory controller
5676  *
5677  * @rdev: radeon_device pointer
5678  *
5679  * Set the location of vram, gart, and AGP in the GPU's
5680  * physical address space (CIK).
5681  */
5682 static void cik_mc_program(struct radeon_device *rdev)
5683 {
5684 	struct evergreen_mc_save save;
5685 	u32 tmp;
5686 	int i, j;
5687 
5688 	/* Initialize HDP */
5689 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5690 		WREG32((0x2c14 + j), 0x00000000);
5691 		WREG32((0x2c18 + j), 0x00000000);
5692 		WREG32((0x2c1c + j), 0x00000000);
5693 		WREG32((0x2c20 + j), 0x00000000);
5694 		WREG32((0x2c24 + j), 0x00000000);
5695 	}
5696 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5697 
5698 	evergreen_mc_stop(rdev, &save);
5699 	if (radeon_mc_wait_for_idle(rdev)) {
5700 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5701 	}
5702 	/* Lockout access through VGA aperture*/
5703 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5704 	/* Update configuration */
5705 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5706 	       rdev->mc.vram_start >> 12);
5707 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5708 	       rdev->mc.vram_end >> 12);
5709 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5710 	       rdev->vram_scratch.gpu_addr >> 12);
5711 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5712 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5713 	WREG32(MC_VM_FB_LOCATION, tmp);
5714 	/* XXX double check these! */
5715 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5716 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5717 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5718 	WREG32(MC_VM_AGP_BASE, 0);
5719 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5720 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5721 	if (radeon_mc_wait_for_idle(rdev)) {
5722 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5723 	}
5724 	evergreen_mc_resume(rdev, &save);
5725 	/* we need to own VRAM, so turn off the VGA renderer here
5726 	 * to stop it overwriting our objects */
5727 	rv515_vga_render_disable(rdev);
5728 }
5729 
5730 /**
5731  * cik_mc_init - initialize the memory controller driver params
5732  *
5733  * @rdev: radeon_device pointer
5734  *
5735  * Look up the amount of vram, vram width, and decide how to place
5736  * vram and gart within the GPU's physical address space (CIK).
5737  * Returns 0 for success.
5738  */
5739 static int cik_mc_init(struct radeon_device *rdev)
5740 {
5741 	u32 tmp;
5742 	int chansize, numchan;
5743 
5744 	/* Get VRAM informations */
5745 	rdev->mc.vram_is_ddr = true;
5746 	tmp = RREG32(MC_ARB_RAMCFG);
5747 	if (tmp & CHANSIZE_MASK) {
5748 		chansize = 64;
5749 	} else {
5750 		chansize = 32;
5751 	}
5752 	tmp = RREG32(MC_SHARED_CHMAP);
5753 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5754 	case 0:
5755 	default:
5756 		numchan = 1;
5757 		break;
5758 	case 1:
5759 		numchan = 2;
5760 		break;
5761 	case 2:
5762 		numchan = 4;
5763 		break;
5764 	case 3:
5765 		numchan = 8;
5766 		break;
5767 	case 4:
5768 		numchan = 3;
5769 		break;
5770 	case 5:
5771 		numchan = 6;
5772 		break;
5773 	case 6:
5774 		numchan = 10;
5775 		break;
5776 	case 7:
5777 		numchan = 12;
5778 		break;
5779 	case 8:
5780 		numchan = 16;
5781 		break;
5782 	}
5783 	rdev->mc.vram_width = numchan * chansize;
5784 	/* Could aper size report 0 ? */
5785 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5786 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5787 	/* size in MB on si */
5788 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5789 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5790 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5791 	si_vram_gtt_location(rdev, &rdev->mc);
5792 	radeon_update_bandwidth_info(rdev);
5793 
5794 	return 0;
5795 }
5796 
5797 /*
5798  * GART
5799  * VMID 0 is the physical GPU addresses as used by the kernel.
5800  * VMIDs 1-15 are used for userspace clients and are handled
5801  * by the radeon vm/hsa code.
5802  */
5803 /**
5804  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5805  *
5806  * @rdev: radeon_device pointer
5807  *
5808  * Flush the TLB for the VMID 0 page table (CIK).
5809  */
5810 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5811 {
5812 	/* flush hdp cache */
5813 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5814 
5815 	/* bits 0-15 are the VM contexts0-15 */
5816 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5817 }
5818 
5819 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5820 {
5821 	int i;
5822 	uint32_t sh_mem_bases, sh_mem_config;
5823 
5824 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5825 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5826 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5827 
5828 	mutex_lock(&rdev->srbm_mutex);
5829 	for (i = 8; i < 16; i++) {
5830 		cik_srbm_select(rdev, 0, 0, 0, i);
5831 		/* CP and shaders */
5832 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5833 		WREG32(SH_MEM_APE1_BASE, 1);
5834 		WREG32(SH_MEM_APE1_LIMIT, 0);
5835 		WREG32(SH_MEM_BASES, sh_mem_bases);
5836 	}
5837 	cik_srbm_select(rdev, 0, 0, 0, 0);
5838 	mutex_unlock(&rdev->srbm_mutex);
5839 }
5840 
5841 /**
5842  * cik_pcie_gart_enable - gart enable
5843  *
5844  * @rdev: radeon_device pointer
5845  *
5846  * This sets up the TLBs, programs the page tables for VMID0,
5847  * sets up the hw for VMIDs 1-15 which are allocated on
5848  * demand, and sets up the global locations for the LDS, GDS,
5849  * and GPUVM for FSA64 clients (CIK).
5850  * Returns 0 for success, errors for failure.
5851  */
5852 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5853 {
5854 	int r, i;
5855 
5856 	if (rdev->gart.robj == NULL) {
5857 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5858 		return -EINVAL;
5859 	}
5860 	r = radeon_gart_table_vram_pin(rdev);
5861 	if (r)
5862 		return r;
5863 	/* Setup TLB control */
5864 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5865 	       (0xA << 7) |
5866 	       ENABLE_L1_TLB |
5867 	       ENABLE_L1_FRAGMENT_PROCESSING |
5868 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5869 	       ENABLE_ADVANCED_DRIVER_MODEL |
5870 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5871 	/* Setup L2 cache */
5872 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5873 	       ENABLE_L2_FRAGMENT_PROCESSING |
5874 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5875 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5876 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5877 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5878 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5879 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5880 	       BANK_SELECT(4) |
5881 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5882 	/* setup context0 */
5883 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5884 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5885 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5886 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5887 			(u32)(rdev->dummy_page.addr >> 12));
5888 	WREG32(VM_CONTEXT0_CNTL2, 0);
5889 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5890 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5891 
5892 	WREG32(0x15D4, 0);
5893 	WREG32(0x15D8, 0);
5894 	WREG32(0x15DC, 0);
5895 
5896 	/* restore context1-15 */
5897 	/* set vm size, must be a multiple of 4 */
5898 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5899 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5900 	for (i = 1; i < 16; i++) {
5901 		if (i < 8)
5902 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5903 			       rdev->vm_manager.saved_table_addr[i]);
5904 		else
5905 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5906 			       rdev->vm_manager.saved_table_addr[i]);
5907 	}
5908 
5909 	/* enable context1-15 */
5910 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5911 	       (u32)(rdev->dummy_page.addr >> 12));
5912 	WREG32(VM_CONTEXT1_CNTL2, 4);
5913 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5914 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5915 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5917 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5919 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5921 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5923 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5924 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5925 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5926 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5927 
5928 	if (rdev->family == CHIP_KAVERI) {
5929 		u32 tmp = RREG32(CHUB_CONTROL);
5930 		tmp &= ~BYPASS_VM;
5931 		WREG32(CHUB_CONTROL, tmp);
5932 	}
5933 
5934 	/* XXX SH_MEM regs */
5935 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5936 	mutex_lock(&rdev->srbm_mutex);
5937 	for (i = 0; i < 16; i++) {
5938 		cik_srbm_select(rdev, 0, 0, 0, i);
5939 		/* CP and shaders */
5940 		WREG32(SH_MEM_CONFIG, 0);
5941 		WREG32(SH_MEM_APE1_BASE, 1);
5942 		WREG32(SH_MEM_APE1_LIMIT, 0);
5943 		WREG32(SH_MEM_BASES, 0);
5944 		/* SDMA GFX */
5945 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5946 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5947 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5948 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5949 		/* XXX SDMA RLC - todo */
5950 	}
5951 	cik_srbm_select(rdev, 0, 0, 0, 0);
5952 	mutex_unlock(&rdev->srbm_mutex);
5953 
5954 	cik_pcie_init_compute_vmid(rdev);
5955 
5956 	cik_pcie_gart_tlb_flush(rdev);
5957 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5958 		 (unsigned)(rdev->mc.gtt_size >> 20),
5959 		 (unsigned long long)rdev->gart.table_addr);
5960 	rdev->gart.ready = true;
5961 	return 0;
5962 }
5963 
5964 /**
5965  * cik_pcie_gart_disable - gart disable
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * This disables all VM page table (CIK).
5970  */
5971 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5972 {
5973 	unsigned i;
5974 
5975 	for (i = 1; i < 16; ++i) {
5976 		uint32_t reg;
5977 		if (i < 8)
5978 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5979 		else
5980 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5981 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5982 	}
5983 
5984 	/* Disable all tables */
5985 	WREG32(VM_CONTEXT0_CNTL, 0);
5986 	WREG32(VM_CONTEXT1_CNTL, 0);
5987 	/* Setup TLB control */
5988 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5989 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5990 	/* Setup L2 cache */
5991 	WREG32(VM_L2_CNTL,
5992 	       ENABLE_L2_FRAGMENT_PROCESSING |
5993 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5994 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5995 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5996 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5997 	WREG32(VM_L2_CNTL2, 0);
5998 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5999 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
6000 	radeon_gart_table_vram_unpin(rdev);
6001 }
6002 
6003 /**
6004  * cik_pcie_gart_fini - vm fini callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Tears down the driver GART/VM setup (CIK).
6009  */
6010 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6011 {
6012 	cik_pcie_gart_disable(rdev);
6013 	radeon_gart_table_vram_free(rdev);
6014 	radeon_gart_fini(rdev);
6015 }
6016 
6017 /* vm parser */
6018 /**
6019  * cik_ib_parse - vm ib_parse callback
6020  *
6021  * @rdev: radeon_device pointer
6022  * @ib: indirect buffer pointer
6023  *
6024  * CIK uses hw IB checking so this is a nop (CIK).
6025  */
6026 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6027 {
6028 	return 0;
6029 }
6030 
6031 /*
6032  * vm
6033  * VMID 0 is the physical GPU addresses as used by the kernel.
6034  * VMIDs 1-15 are used for userspace clients and are handled
6035  * by the radeon vm/hsa code.
6036  */
6037 /**
6038  * cik_vm_init - cik vm init callback
6039  *
6040  * @rdev: radeon_device pointer
6041  *
6042  * Inits cik specific vm parameters (number of VMs, base of vram for
6043  * VMIDs 1-15) (CIK).
6044  * Returns 0 for success.
6045  */
6046 int cik_vm_init(struct radeon_device *rdev)
6047 {
6048 	/*
6049 	 * number of VMs
6050 	 * VMID 0 is reserved for System
6051 	 * radeon graphics/compute will use VMIDs 1-7
6052 	 * amdkfd will use VMIDs 8-15
6053 	 */
6054 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6055 	/* base offset of vram pages */
6056 	if (rdev->flags & RADEON_IS_IGP) {
6057 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6058 		tmp <<= 22;
6059 		rdev->vm_manager.vram_base_offset = tmp;
6060 	} else
6061 		rdev->vm_manager.vram_base_offset = 0;
6062 
6063 	return 0;
6064 }
6065 
6066 /**
6067  * cik_vm_fini - cik vm fini callback
6068  *
6069  * @rdev: radeon_device pointer
6070  *
6071  * Tear down any asic specific VM setup (CIK).
6072  */
6073 void cik_vm_fini(struct radeon_device *rdev)
6074 {
6075 }
6076 
6077 /**
6078  * cik_vm_decode_fault - print human readable fault info
6079  *
6080  * @rdev: radeon_device pointer
6081  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6082  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6083  *
6084  * Print human readable fault information (CIK).
6085  */
6086 static void cik_vm_decode_fault(struct radeon_device *rdev,
6087 				u32 status, u32 addr, u32 mc_client)
6088 {
6089 	u32 mc_id;
6090 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6091 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6092 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6093 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6094 
6095 	if (rdev->family == CHIP_HAWAII)
6096 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6097 	else
6098 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6099 
6100 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6101 	       protections, vmid, addr,
6102 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6103 	       block, mc_client, mc_id);
6104 }
6105 
6106 /**
6107  * cik_vm_flush - cik vm flush using the CP
6108  *
6109  * @rdev: radeon_device pointer
6110  *
6111  * Update the page table base and flush the VM TLB
6112  * using the CP (CIK).
6113  */
6114 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6115 		  unsigned vm_id, uint64_t pd_addr)
6116 {
6117 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6118 
6119 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6120 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6121 				 WRITE_DATA_DST_SEL(0)));
6122 	if (vm_id < 8) {
6123 		radeon_ring_write(ring,
6124 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6125 	} else {
6126 		radeon_ring_write(ring,
6127 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6128 	}
6129 	radeon_ring_write(ring, 0);
6130 	radeon_ring_write(ring, pd_addr >> 12);
6131 
6132 	/* update SH_MEM_* regs */
6133 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6134 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6135 				 WRITE_DATA_DST_SEL(0)));
6136 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6137 	radeon_ring_write(ring, 0);
6138 	radeon_ring_write(ring, VMID(vm_id));
6139 
6140 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6141 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6142 				 WRITE_DATA_DST_SEL(0)));
6143 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6144 	radeon_ring_write(ring, 0);
6145 
6146 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6147 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6148 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6149 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6150 
6151 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6152 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6153 				 WRITE_DATA_DST_SEL(0)));
6154 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6155 	radeon_ring_write(ring, 0);
6156 	radeon_ring_write(ring, VMID(0));
6157 
6158 	/* HDP flush */
6159 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6160 
6161 	/* bits 0-15 are the VM contexts0-15 */
6162 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6163 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6164 				 WRITE_DATA_DST_SEL(0)));
6165 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6166 	radeon_ring_write(ring, 0);
6167 	radeon_ring_write(ring, 1 << vm_id);
6168 
6169 	/* wait for the invalidate to complete */
6170 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6171 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6172 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6173 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6174 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6175 	radeon_ring_write(ring, 0);
6176 	radeon_ring_write(ring, 0); /* ref */
6177 	radeon_ring_write(ring, 0); /* mask */
6178 	radeon_ring_write(ring, 0x20); /* poll interval */
6179 
6180 	/* compute doesn't have PFP */
6181 	if (usepfp) {
6182 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6183 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6184 		radeon_ring_write(ring, 0x0);
6185 	}
6186 }
6187 
6188 /*
6189  * RLC
6190  * The RLC is a multi-purpose microengine that handles a
6191  * variety of functions, the most important of which is
6192  * the interrupt controller.
6193  */
6194 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6195 					  bool enable)
6196 {
6197 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6198 
6199 	if (enable)
6200 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6201 	else
6202 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6203 	WREG32(CP_INT_CNTL_RING0, tmp);
6204 }
6205 
6206 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6207 {
6208 	u32 tmp;
6209 
6210 	tmp = RREG32(RLC_LB_CNTL);
6211 	if (enable)
6212 		tmp |= LOAD_BALANCE_ENABLE;
6213 	else
6214 		tmp &= ~LOAD_BALANCE_ENABLE;
6215 	WREG32(RLC_LB_CNTL, tmp);
6216 }
6217 
6218 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6219 {
6220 	u32 i, j, k;
6221 	u32 mask;
6222 
6223 	mutex_lock(&rdev->grbm_idx_mutex);
6224 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6225 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6226 			cik_select_se_sh(rdev, i, j);
6227 			for (k = 0; k < rdev->usec_timeout; k++) {
6228 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6229 					break;
6230 				udelay(1);
6231 			}
6232 		}
6233 	}
6234 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6235 	mutex_unlock(&rdev->grbm_idx_mutex);
6236 
6237 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6238 	for (k = 0; k < rdev->usec_timeout; k++) {
6239 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6240 			break;
6241 		udelay(1);
6242 	}
6243 }
6244 
6245 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6246 {
6247 	u32 tmp;
6248 
6249 	tmp = RREG32(RLC_CNTL);
6250 	if (tmp != rlc)
6251 		WREG32(RLC_CNTL, rlc);
6252 }
6253 
6254 static u32 cik_halt_rlc(struct radeon_device *rdev)
6255 {
6256 	u32 data, orig;
6257 
6258 	orig = data = RREG32(RLC_CNTL);
6259 
6260 	if (data & RLC_ENABLE) {
6261 		u32 i;
6262 
6263 		data &= ~RLC_ENABLE;
6264 		WREG32(RLC_CNTL, data);
6265 
6266 		for (i = 0; i < rdev->usec_timeout; i++) {
6267 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6268 				break;
6269 			udelay(1);
6270 		}
6271 
6272 		cik_wait_for_rlc_serdes(rdev);
6273 	}
6274 
6275 	return orig;
6276 }
6277 
6278 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6279 {
6280 	u32 tmp, i, mask;
6281 
6282 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6283 	WREG32(RLC_GPR_REG2, tmp);
6284 
6285 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6286 	for (i = 0; i < rdev->usec_timeout; i++) {
6287 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6288 			break;
6289 		udelay(1);
6290 	}
6291 
6292 	for (i = 0; i < rdev->usec_timeout; i++) {
6293 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6294 			break;
6295 		udelay(1);
6296 	}
6297 }
6298 
6299 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6300 {
6301 	u32 tmp;
6302 
6303 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6304 	WREG32(RLC_GPR_REG2, tmp);
6305 }
6306 
6307 /**
6308  * cik_rlc_stop - stop the RLC ME
6309  *
6310  * @rdev: radeon_device pointer
6311  *
6312  * Halt the RLC ME (MicroEngine) (CIK).
6313  */
6314 static void cik_rlc_stop(struct radeon_device *rdev)
6315 {
6316 	WREG32(RLC_CNTL, 0);
6317 
6318 	cik_enable_gui_idle_interrupt(rdev, false);
6319 
6320 	cik_wait_for_rlc_serdes(rdev);
6321 }
6322 
6323 /**
6324  * cik_rlc_start - start the RLC ME
6325  *
6326  * @rdev: radeon_device pointer
6327  *
6328  * Unhalt the RLC ME (MicroEngine) (CIK).
6329  */
6330 static void cik_rlc_start(struct radeon_device *rdev)
6331 {
6332 	WREG32(RLC_CNTL, RLC_ENABLE);
6333 
6334 	cik_enable_gui_idle_interrupt(rdev, true);
6335 
6336 	udelay(50);
6337 }
6338 
6339 /**
6340  * cik_rlc_resume - setup the RLC hw
6341  *
6342  * @rdev: radeon_device pointer
6343  *
6344  * Initialize the RLC registers, load the ucode,
6345  * and start the RLC (CIK).
6346  * Returns 0 for success, -EINVAL if the ucode is not available.
6347  */
6348 static int cik_rlc_resume(struct radeon_device *rdev)
6349 {
6350 	u32 i, size, tmp;
6351 
6352 	if (!rdev->rlc_fw)
6353 		return -EINVAL;
6354 
6355 	cik_rlc_stop(rdev);
6356 
6357 	/* disable CG */
6358 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6359 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6360 
6361 	si_rlc_reset(rdev);
6362 
6363 	cik_init_pg(rdev);
6364 
6365 	cik_init_cg(rdev);
6366 
6367 	WREG32(RLC_LB_CNTR_INIT, 0);
6368 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6369 
6370 	mutex_lock(&rdev->grbm_idx_mutex);
6371 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6372 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6373 	WREG32(RLC_LB_PARAMS, 0x00600408);
6374 	WREG32(RLC_LB_CNTL, 0x80000004);
6375 	mutex_unlock(&rdev->grbm_idx_mutex);
6376 
6377 	WREG32(RLC_MC_CNTL, 0);
6378 	WREG32(RLC_UCODE_CNTL, 0);
6379 
6380 	if (rdev->new_fw) {
6381 		const struct rlc_firmware_header_v1_0 *hdr =
6382 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6383 		const __le32 *fw_data = (const __le32 *)
6384 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6385 
6386 		radeon_ucode_print_rlc_hdr(&hdr->header);
6387 
6388 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6389 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6390 		for (i = 0; i < size; i++)
6391 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6392 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6393 	} else {
6394 		const __be32 *fw_data;
6395 
6396 		switch (rdev->family) {
6397 		case CHIP_BONAIRE:
6398 		case CHIP_HAWAII:
6399 		default:
6400 			size = BONAIRE_RLC_UCODE_SIZE;
6401 			break;
6402 		case CHIP_KAVERI:
6403 			size = KV_RLC_UCODE_SIZE;
6404 			break;
6405 		case CHIP_KABINI:
6406 			size = KB_RLC_UCODE_SIZE;
6407 			break;
6408 		case CHIP_MULLINS:
6409 			size = ML_RLC_UCODE_SIZE;
6410 			break;
6411 		}
6412 
6413 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6414 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6415 		for (i = 0; i < size; i++)
6416 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6417 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6418 	}
6419 
6420 	/* XXX - find out what chips support lbpw */
6421 	cik_enable_lbpw(rdev, false);
6422 
6423 	if (rdev->family == CHIP_BONAIRE)
6424 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6425 
6426 	cik_rlc_start(rdev);
6427 
6428 	return 0;
6429 }
6430 
6431 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6432 {
6433 	u32 data, orig, tmp, tmp2;
6434 
6435 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6436 
6437 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6438 		cik_enable_gui_idle_interrupt(rdev, true);
6439 
6440 		tmp = cik_halt_rlc(rdev);
6441 
6442 		mutex_lock(&rdev->grbm_idx_mutex);
6443 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6444 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6445 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6446 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6447 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6448 		mutex_unlock(&rdev->grbm_idx_mutex);
6449 
6450 		cik_update_rlc(rdev, tmp);
6451 
6452 		data |= CGCG_EN | CGLS_EN;
6453 	} else {
6454 		cik_enable_gui_idle_interrupt(rdev, false);
6455 
6456 		RREG32(CB_CGTT_SCLK_CTRL);
6457 		RREG32(CB_CGTT_SCLK_CTRL);
6458 		RREG32(CB_CGTT_SCLK_CTRL);
6459 		RREG32(CB_CGTT_SCLK_CTRL);
6460 
6461 		data &= ~(CGCG_EN | CGLS_EN);
6462 	}
6463 
6464 	if (orig != data)
6465 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6466 
6467 }
6468 
6469 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6470 {
6471 	u32 data, orig, tmp = 0;
6472 
6473 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6474 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6475 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6476 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6477 				data |= CP_MEM_LS_EN;
6478 				if (orig != data)
6479 					WREG32(CP_MEM_SLP_CNTL, data);
6480 			}
6481 		}
6482 
6483 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484 		data |= 0x00000001;
6485 		data &= 0xfffffffd;
6486 		if (orig != data)
6487 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6488 
6489 		tmp = cik_halt_rlc(rdev);
6490 
6491 		mutex_lock(&rdev->grbm_idx_mutex);
6492 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6493 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6494 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6495 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6496 		WREG32(RLC_SERDES_WR_CTRL, data);
6497 		mutex_unlock(&rdev->grbm_idx_mutex);
6498 
6499 		cik_update_rlc(rdev, tmp);
6500 
6501 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6502 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6503 			data &= ~SM_MODE_MASK;
6504 			data |= SM_MODE(0x2);
6505 			data |= SM_MODE_ENABLE;
6506 			data &= ~CGTS_OVERRIDE;
6507 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6508 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6509 				data &= ~CGTS_LS_OVERRIDE;
6510 			data &= ~ON_MONITOR_ADD_MASK;
6511 			data |= ON_MONITOR_ADD_EN;
6512 			data |= ON_MONITOR_ADD(0x96);
6513 			if (orig != data)
6514 				WREG32(CGTS_SM_CTRL_REG, data);
6515 		}
6516 	} else {
6517 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6518 		data |= 0x00000003;
6519 		if (orig != data)
6520 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6521 
6522 		data = RREG32(RLC_MEM_SLP_CNTL);
6523 		if (data & RLC_MEM_LS_EN) {
6524 			data &= ~RLC_MEM_LS_EN;
6525 			WREG32(RLC_MEM_SLP_CNTL, data);
6526 		}
6527 
6528 		data = RREG32(CP_MEM_SLP_CNTL);
6529 		if (data & CP_MEM_LS_EN) {
6530 			data &= ~CP_MEM_LS_EN;
6531 			WREG32(CP_MEM_SLP_CNTL, data);
6532 		}
6533 
6534 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6535 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6536 		if (orig != data)
6537 			WREG32(CGTS_SM_CTRL_REG, data);
6538 
6539 		tmp = cik_halt_rlc(rdev);
6540 
6541 		mutex_lock(&rdev->grbm_idx_mutex);
6542 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6543 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6544 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6545 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6546 		WREG32(RLC_SERDES_WR_CTRL, data);
6547 		mutex_unlock(&rdev->grbm_idx_mutex);
6548 
6549 		cik_update_rlc(rdev, tmp);
6550 	}
6551 }
6552 
6553 static const u32 mc_cg_registers[] =
6554 {
6555 	MC_HUB_MISC_HUB_CG,
6556 	MC_HUB_MISC_SIP_CG,
6557 	MC_HUB_MISC_VM_CG,
6558 	MC_XPB_CLK_GAT,
6559 	ATC_MISC_CG,
6560 	MC_CITF_MISC_WR_CG,
6561 	MC_CITF_MISC_RD_CG,
6562 	MC_CITF_MISC_VM_CG,
6563 	VM_L2_CG,
6564 };
6565 
6566 static void cik_enable_mc_ls(struct radeon_device *rdev,
6567 			     bool enable)
6568 {
6569 	int i;
6570 	u32 orig, data;
6571 
6572 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6573 		orig = data = RREG32(mc_cg_registers[i]);
6574 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6575 			data |= MC_LS_ENABLE;
6576 		else
6577 			data &= ~MC_LS_ENABLE;
6578 		if (data != orig)
6579 			WREG32(mc_cg_registers[i], data);
6580 	}
6581 }
6582 
6583 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6584 			       bool enable)
6585 {
6586 	int i;
6587 	u32 orig, data;
6588 
6589 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6590 		orig = data = RREG32(mc_cg_registers[i]);
6591 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6592 			data |= MC_CG_ENABLE;
6593 		else
6594 			data &= ~MC_CG_ENABLE;
6595 		if (data != orig)
6596 			WREG32(mc_cg_registers[i], data);
6597 	}
6598 }
6599 
6600 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6601 				 bool enable)
6602 {
6603 	u32 orig, data;
6604 
6605 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6606 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6607 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6608 	} else {
6609 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6610 		data |= 0xff000000;
6611 		if (data != orig)
6612 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6613 
6614 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6615 		data |= 0xff000000;
6616 		if (data != orig)
6617 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6618 	}
6619 }
6620 
6621 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6622 				 bool enable)
6623 {
6624 	u32 orig, data;
6625 
6626 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6627 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6628 		data |= 0x100;
6629 		if (orig != data)
6630 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6631 
6632 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6633 		data |= 0x100;
6634 		if (orig != data)
6635 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6636 	} else {
6637 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6638 		data &= ~0x100;
6639 		if (orig != data)
6640 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6641 
6642 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6643 		data &= ~0x100;
6644 		if (orig != data)
6645 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6646 	}
6647 }
6648 
6649 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6650 				bool enable)
6651 {
6652 	u32 orig, data;
6653 
6654 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6655 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6656 		data = 0xfff;
6657 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6658 
6659 		orig = data = RREG32(UVD_CGC_CTRL);
6660 		data |= DCM;
6661 		if (orig != data)
6662 			WREG32(UVD_CGC_CTRL, data);
6663 	} else {
6664 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6665 		data &= ~0xfff;
6666 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6667 
6668 		orig = data = RREG32(UVD_CGC_CTRL);
6669 		data &= ~DCM;
6670 		if (orig != data)
6671 			WREG32(UVD_CGC_CTRL, data);
6672 	}
6673 }
6674 
6675 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6676 			       bool enable)
6677 {
6678 	u32 orig, data;
6679 
6680 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6681 
6682 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6683 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6684 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6685 	else
6686 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6687 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6688 
6689 	if (orig != data)
6690 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6691 }
6692 
6693 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6694 				bool enable)
6695 {
6696 	u32 orig, data;
6697 
6698 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6699 
6700 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6701 		data &= ~CLOCK_GATING_DIS;
6702 	else
6703 		data |= CLOCK_GATING_DIS;
6704 
6705 	if (orig != data)
6706 		WREG32(HDP_HOST_PATH_CNTL, data);
6707 }
6708 
6709 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6710 			      bool enable)
6711 {
6712 	u32 orig, data;
6713 
6714 	orig = data = RREG32(HDP_MEM_POWER_LS);
6715 
6716 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6717 		data |= HDP_LS_ENABLE;
6718 	else
6719 		data &= ~HDP_LS_ENABLE;
6720 
6721 	if (orig != data)
6722 		WREG32(HDP_MEM_POWER_LS, data);
6723 }
6724 
6725 void cik_update_cg(struct radeon_device *rdev,
6726 		   u32 block, bool enable)
6727 {
6728 
6729 	if (block & RADEON_CG_BLOCK_GFX) {
6730 		cik_enable_gui_idle_interrupt(rdev, false);
6731 		/* order matters! */
6732 		if (enable) {
6733 			cik_enable_mgcg(rdev, true);
6734 			cik_enable_cgcg(rdev, true);
6735 		} else {
6736 			cik_enable_cgcg(rdev, false);
6737 			cik_enable_mgcg(rdev, false);
6738 		}
6739 		cik_enable_gui_idle_interrupt(rdev, true);
6740 	}
6741 
6742 	if (block & RADEON_CG_BLOCK_MC) {
6743 		if (!(rdev->flags & RADEON_IS_IGP)) {
6744 			cik_enable_mc_mgcg(rdev, enable);
6745 			cik_enable_mc_ls(rdev, enable);
6746 		}
6747 	}
6748 
6749 	if (block & RADEON_CG_BLOCK_SDMA) {
6750 		cik_enable_sdma_mgcg(rdev, enable);
6751 		cik_enable_sdma_mgls(rdev, enable);
6752 	}
6753 
6754 	if (block & RADEON_CG_BLOCK_BIF) {
6755 		cik_enable_bif_mgls(rdev, enable);
6756 	}
6757 
6758 	if (block & RADEON_CG_BLOCK_UVD) {
6759 		if (rdev->has_uvd)
6760 			cik_enable_uvd_mgcg(rdev, enable);
6761 	}
6762 
6763 	if (block & RADEON_CG_BLOCK_HDP) {
6764 		cik_enable_hdp_mgcg(rdev, enable);
6765 		cik_enable_hdp_ls(rdev, enable);
6766 	}
6767 
6768 	if (block & RADEON_CG_BLOCK_VCE) {
6769 		vce_v2_0_enable_mgcg(rdev, enable);
6770 	}
6771 }
6772 
6773 static void cik_init_cg(struct radeon_device *rdev)
6774 {
6775 
6776 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6777 
6778 	if (rdev->has_uvd)
6779 		si_init_uvd_internal_cg(rdev);
6780 
6781 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6782 			     RADEON_CG_BLOCK_SDMA |
6783 			     RADEON_CG_BLOCK_BIF |
6784 			     RADEON_CG_BLOCK_UVD |
6785 			     RADEON_CG_BLOCK_HDP), true);
6786 }
6787 
6788 static void cik_fini_cg(struct radeon_device *rdev)
6789 {
6790 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6791 			     RADEON_CG_BLOCK_SDMA |
6792 			     RADEON_CG_BLOCK_BIF |
6793 			     RADEON_CG_BLOCK_UVD |
6794 			     RADEON_CG_BLOCK_HDP), false);
6795 
6796 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6797 }
6798 
6799 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6800 					  bool enable)
6801 {
6802 	u32 data, orig;
6803 
6804 	orig = data = RREG32(RLC_PG_CNTL);
6805 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6806 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6807 	else
6808 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6809 	if (orig != data)
6810 		WREG32(RLC_PG_CNTL, data);
6811 }
6812 
6813 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6814 					  bool enable)
6815 {
6816 	u32 data, orig;
6817 
6818 	orig = data = RREG32(RLC_PG_CNTL);
6819 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6820 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6821 	else
6822 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6823 	if (orig != data)
6824 		WREG32(RLC_PG_CNTL, data);
6825 }
6826 
6827 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6828 {
6829 	u32 data, orig;
6830 
6831 	orig = data = RREG32(RLC_PG_CNTL);
6832 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6833 		data &= ~DISABLE_CP_PG;
6834 	else
6835 		data |= DISABLE_CP_PG;
6836 	if (orig != data)
6837 		WREG32(RLC_PG_CNTL, data);
6838 }
6839 
6840 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6841 {
6842 	u32 data, orig;
6843 
6844 	orig = data = RREG32(RLC_PG_CNTL);
6845 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6846 		data &= ~DISABLE_GDS_PG;
6847 	else
6848 		data |= DISABLE_GDS_PG;
6849 	if (orig != data)
6850 		WREG32(RLC_PG_CNTL, data);
6851 }
6852 
6853 #define CP_ME_TABLE_SIZE    96
6854 #define CP_ME_TABLE_OFFSET  2048
6855 #define CP_MEC_TABLE_OFFSET 4096
6856 
6857 void cik_init_cp_pg_table(struct radeon_device *rdev)
6858 {
6859 	volatile u32 *dst_ptr;
6860 	int me, i, max_me = 4;
6861 	u32 bo_offset = 0;
6862 	u32 table_offset, table_size;
6863 
6864 	if (rdev->family == CHIP_KAVERI)
6865 		max_me = 5;
6866 
6867 	if (rdev->rlc.cp_table_ptr == NULL)
6868 		return;
6869 
6870 	/* write the cp table buffer */
6871 	dst_ptr = rdev->rlc.cp_table_ptr;
6872 	for (me = 0; me < max_me; me++) {
6873 		if (rdev->new_fw) {
6874 			const __le32 *fw_data;
6875 			const struct gfx_firmware_header_v1_0 *hdr;
6876 
6877 			if (me == 0) {
6878 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6879 				fw_data = (const __le32 *)
6880 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6881 				table_offset = le32_to_cpu(hdr->jt_offset);
6882 				table_size = le32_to_cpu(hdr->jt_size);
6883 			} else if (me == 1) {
6884 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6885 				fw_data = (const __le32 *)
6886 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6887 				table_offset = le32_to_cpu(hdr->jt_offset);
6888 				table_size = le32_to_cpu(hdr->jt_size);
6889 			} else if (me == 2) {
6890 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6891 				fw_data = (const __le32 *)
6892 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6893 				table_offset = le32_to_cpu(hdr->jt_offset);
6894 				table_size = le32_to_cpu(hdr->jt_size);
6895 			} else if (me == 3) {
6896 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6897 				fw_data = (const __le32 *)
6898 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6899 				table_offset = le32_to_cpu(hdr->jt_offset);
6900 				table_size = le32_to_cpu(hdr->jt_size);
6901 			} else {
6902 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6903 				fw_data = (const __le32 *)
6904 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6905 				table_offset = le32_to_cpu(hdr->jt_offset);
6906 				table_size = le32_to_cpu(hdr->jt_size);
6907 			}
6908 
6909 			for (i = 0; i < table_size; i ++) {
6910 				dst_ptr[bo_offset + i] =
6911 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6912 			}
6913 			bo_offset += table_size;
6914 		} else {
6915 			const __be32 *fw_data;
6916 			table_size = CP_ME_TABLE_SIZE;
6917 
6918 			if (me == 0) {
6919 				fw_data = (const __be32 *)rdev->ce_fw->data;
6920 				table_offset = CP_ME_TABLE_OFFSET;
6921 			} else if (me == 1) {
6922 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6923 				table_offset = CP_ME_TABLE_OFFSET;
6924 			} else if (me == 2) {
6925 				fw_data = (const __be32 *)rdev->me_fw->data;
6926 				table_offset = CP_ME_TABLE_OFFSET;
6927 			} else {
6928 				fw_data = (const __be32 *)rdev->mec_fw->data;
6929 				table_offset = CP_MEC_TABLE_OFFSET;
6930 			}
6931 
6932 			for (i = 0; i < table_size; i ++) {
6933 				dst_ptr[bo_offset + i] =
6934 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6935 			}
6936 			bo_offset += table_size;
6937 		}
6938 	}
6939 }
6940 
6941 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6942 				bool enable)
6943 {
6944 	u32 data, orig;
6945 
6946 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6947 		orig = data = RREG32(RLC_PG_CNTL);
6948 		data |= GFX_PG_ENABLE;
6949 		if (orig != data)
6950 			WREG32(RLC_PG_CNTL, data);
6951 
6952 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6953 		data |= AUTO_PG_EN;
6954 		if (orig != data)
6955 			WREG32(RLC_AUTO_PG_CTRL, data);
6956 	} else {
6957 		orig = data = RREG32(RLC_PG_CNTL);
6958 		data &= ~GFX_PG_ENABLE;
6959 		if (orig != data)
6960 			WREG32(RLC_PG_CNTL, data);
6961 
6962 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6963 		data &= ~AUTO_PG_EN;
6964 		if (orig != data)
6965 			WREG32(RLC_AUTO_PG_CTRL, data);
6966 
6967 		data = RREG32(DB_RENDER_CONTROL);
6968 	}
6969 }
6970 
6971 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6972 {
6973 	u32 mask = 0, tmp, tmp1;
6974 	int i;
6975 
6976 	mutex_lock(&rdev->grbm_idx_mutex);
6977 	cik_select_se_sh(rdev, se, sh);
6978 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6979 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6980 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6981 	mutex_unlock(&rdev->grbm_idx_mutex);
6982 
6983 	tmp &= 0xffff0000;
6984 
6985 	tmp |= tmp1;
6986 	tmp >>= 16;
6987 
6988 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6989 		mask <<= 1;
6990 		mask |= 1;
6991 	}
6992 
6993 	return (~tmp) & mask;
6994 }
6995 
6996 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6997 {
6998 	u32 i, j, k, active_cu_number = 0;
6999 	u32 mask, counter, cu_bitmap;
7000 	u32 tmp = 0;
7001 
7002 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
7003 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7004 			mask = 1;
7005 			cu_bitmap = 0;
7006 			counter = 0;
7007 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7008 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7009 					if (counter < 2)
7010 						cu_bitmap |= mask;
7011 					counter ++;
7012 				}
7013 				mask <<= 1;
7014 			}
7015 
7016 			active_cu_number += counter;
7017 			tmp |= (cu_bitmap << (i * 16 + j * 8));
7018 		}
7019 	}
7020 
7021 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7022 
7023 	tmp = RREG32(RLC_MAX_PG_CU);
7024 	tmp &= ~MAX_PU_CU_MASK;
7025 	tmp |= MAX_PU_CU(active_cu_number);
7026 	WREG32(RLC_MAX_PG_CU, tmp);
7027 }
7028 
7029 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7030 				       bool enable)
7031 {
7032 	u32 data, orig;
7033 
7034 	orig = data = RREG32(RLC_PG_CNTL);
7035 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7036 		data |= STATIC_PER_CU_PG_ENABLE;
7037 	else
7038 		data &= ~STATIC_PER_CU_PG_ENABLE;
7039 	if (orig != data)
7040 		WREG32(RLC_PG_CNTL, data);
7041 }
7042 
7043 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7044 					bool enable)
7045 {
7046 	u32 data, orig;
7047 
7048 	orig = data = RREG32(RLC_PG_CNTL);
7049 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7050 		data |= DYN_PER_CU_PG_ENABLE;
7051 	else
7052 		data &= ~DYN_PER_CU_PG_ENABLE;
7053 	if (orig != data)
7054 		WREG32(RLC_PG_CNTL, data);
7055 }
7056 
7057 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7058 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7059 
7060 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7061 {
7062 	u32 data, orig;
7063 	u32 i;
7064 
7065 	if (rdev->rlc.cs_data) {
7066 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7067 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7068 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7069 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7070 	} else {
7071 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7072 		for (i = 0; i < 3; i++)
7073 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7074 	}
7075 	if (rdev->rlc.reg_list) {
7076 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7077 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7078 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7079 	}
7080 
7081 	orig = data = RREG32(RLC_PG_CNTL);
7082 	data |= GFX_PG_SRC;
7083 	if (orig != data)
7084 		WREG32(RLC_PG_CNTL, data);
7085 
7086 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7087 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7088 
7089 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7090 	data &= ~IDLE_POLL_COUNT_MASK;
7091 	data |= IDLE_POLL_COUNT(0x60);
7092 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7093 
7094 	data = 0x10101010;
7095 	WREG32(RLC_PG_DELAY, data);
7096 
7097 	data = RREG32(RLC_PG_DELAY_2);
7098 	data &= ~0xff;
7099 	data |= 0x3;
7100 	WREG32(RLC_PG_DELAY_2, data);
7101 
7102 	data = RREG32(RLC_AUTO_PG_CTRL);
7103 	data &= ~GRBM_REG_SGIT_MASK;
7104 	data |= GRBM_REG_SGIT(0x700);
7105 	WREG32(RLC_AUTO_PG_CTRL, data);
7106 
7107 }
7108 
7109 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7110 {
7111 	cik_enable_gfx_cgpg(rdev, enable);
7112 	cik_enable_gfx_static_mgpg(rdev, enable);
7113 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7114 }
7115 
7116 u32 cik_get_csb_size(struct radeon_device *rdev)
7117 {
7118 	u32 count = 0;
7119 	const struct cs_section_def *sect = NULL;
7120 	const struct cs_extent_def *ext = NULL;
7121 
7122 	if (rdev->rlc.cs_data == NULL)
7123 		return 0;
7124 
7125 	/* begin clear state */
7126 	count += 2;
7127 	/* context control state */
7128 	count += 3;
7129 
7130 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7131 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7132 			if (sect->id == SECT_CONTEXT)
7133 				count += 2 + ext->reg_count;
7134 			else
7135 				return 0;
7136 		}
7137 	}
7138 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7139 	count += 4;
7140 	/* end clear state */
7141 	count += 2;
7142 	/* clear state */
7143 	count += 2;
7144 
7145 	return count;
7146 }
7147 
7148 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7149 {
7150 	u32 count = 0, i;
7151 	const struct cs_section_def *sect = NULL;
7152 	const struct cs_extent_def *ext = NULL;
7153 
7154 	if (rdev->rlc.cs_data == NULL)
7155 		return;
7156 	if (buffer == NULL)
7157 		return;
7158 
7159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7160 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7161 
7162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7163 	buffer[count++] = cpu_to_le32(0x80000000);
7164 	buffer[count++] = cpu_to_le32(0x80000000);
7165 
7166 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7167 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7168 			if (sect->id == SECT_CONTEXT) {
7169 				buffer[count++] =
7170 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7171 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7172 				for (i = 0; i < ext->reg_count; i++)
7173 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7174 			} else {
7175 				return;
7176 			}
7177 		}
7178 	}
7179 
7180 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7181 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7182 	switch (rdev->family) {
7183 	case CHIP_BONAIRE:
7184 		buffer[count++] = cpu_to_le32(0x16000012);
7185 		buffer[count++] = cpu_to_le32(0x00000000);
7186 		break;
7187 	case CHIP_KAVERI:
7188 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7189 		buffer[count++] = cpu_to_le32(0x00000000);
7190 		break;
7191 	case CHIP_KABINI:
7192 	case CHIP_MULLINS:
7193 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7194 		buffer[count++] = cpu_to_le32(0x00000000);
7195 		break;
7196 	case CHIP_HAWAII:
7197 		buffer[count++] = cpu_to_le32(0x3a00161a);
7198 		buffer[count++] = cpu_to_le32(0x0000002e);
7199 		break;
7200 	default:
7201 		buffer[count++] = cpu_to_le32(0x00000000);
7202 		buffer[count++] = cpu_to_le32(0x00000000);
7203 		break;
7204 	}
7205 
7206 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7207 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7208 
7209 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7210 	buffer[count++] = cpu_to_le32(0);
7211 }
7212 
7213 static void cik_init_pg(struct radeon_device *rdev)
7214 {
7215 	if (rdev->pg_flags) {
7216 		cik_enable_sck_slowdown_on_pu(rdev, true);
7217 		cik_enable_sck_slowdown_on_pd(rdev, true);
7218 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7219 			cik_init_gfx_cgpg(rdev);
7220 			cik_enable_cp_pg(rdev, true);
7221 			cik_enable_gds_pg(rdev, true);
7222 		}
7223 		cik_init_ao_cu_mask(rdev);
7224 		cik_update_gfx_pg(rdev, true);
7225 	}
7226 }
7227 
7228 static void cik_fini_pg(struct radeon_device *rdev)
7229 {
7230 	if (rdev->pg_flags) {
7231 		cik_update_gfx_pg(rdev, false);
7232 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7233 			cik_enable_cp_pg(rdev, false);
7234 			cik_enable_gds_pg(rdev, false);
7235 		}
7236 	}
7237 }
7238 
7239 /*
7240  * Interrupts
7241  * Starting with r6xx, interrupts are handled via a ring buffer.
7242  * Ring buffers are areas of GPU accessible memory that the GPU
7243  * writes interrupt vectors into and the host reads vectors out of.
7244  * There is a rptr (read pointer) that determines where the
7245  * host is currently reading, and a wptr (write pointer)
7246  * which determines where the GPU has written.  When the
7247  * pointers are equal, the ring is idle.  When the GPU
7248  * writes vectors to the ring buffer, it increments the
7249  * wptr.  When there is an interrupt, the host then starts
7250  * fetching commands and processing them until the pointers are
7251  * equal again at which point it updates the rptr.
7252  */
7253 
7254 /**
7255  * cik_enable_interrupts - Enable the interrupt ring buffer
7256  *
7257  * @rdev: radeon_device pointer
7258  *
7259  * Enable the interrupt ring buffer (CIK).
7260  */
7261 static void cik_enable_interrupts(struct radeon_device *rdev)
7262 {
7263 	u32 ih_cntl = RREG32(IH_CNTL);
7264 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7265 
7266 	ih_cntl |= ENABLE_INTR;
7267 	ih_rb_cntl |= IH_RB_ENABLE;
7268 	WREG32(IH_CNTL, ih_cntl);
7269 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7270 	rdev->ih.enabled = true;
7271 }
7272 
7273 /**
7274  * cik_disable_interrupts - Disable the interrupt ring buffer
7275  *
7276  * @rdev: radeon_device pointer
7277  *
7278  * Disable the interrupt ring buffer (CIK).
7279  */
7280 static void cik_disable_interrupts(struct radeon_device *rdev)
7281 {
7282 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7283 	u32 ih_cntl = RREG32(IH_CNTL);
7284 
7285 	ih_rb_cntl &= ~IH_RB_ENABLE;
7286 	ih_cntl &= ~ENABLE_INTR;
7287 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7288 	WREG32(IH_CNTL, ih_cntl);
7289 	/* set rptr, wptr to 0 */
7290 	WREG32(IH_RB_RPTR, 0);
7291 	WREG32(IH_RB_WPTR, 0);
7292 	rdev->ih.enabled = false;
7293 	rdev->ih.rptr = 0;
7294 }
7295 
7296 /**
7297  * cik_disable_interrupt_state - Disable all interrupt sources
7298  *
7299  * @rdev: radeon_device pointer
7300  *
7301  * Clear all interrupt enable bits used by the driver (CIK).
7302  */
7303 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7304 {
7305 	u32 tmp;
7306 
7307 	/* gfx ring */
7308 	tmp = RREG32(CP_INT_CNTL_RING0) &
7309 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7310 	WREG32(CP_INT_CNTL_RING0, tmp);
7311 	/* sdma */
7312 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7313 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7314 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7315 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7316 	/* compute queues */
7317 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7318 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7319 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7320 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7321 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7322 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7323 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7324 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7325 	/* grbm */
7326 	WREG32(GRBM_INT_CNTL, 0);
7327 	/* SRBM */
7328 	WREG32(SRBM_INT_CNTL, 0);
7329 	/* vline/vblank, etc. */
7330 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7331 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7332 	if (rdev->num_crtc >= 4) {
7333 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7334 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7335 	}
7336 	if (rdev->num_crtc >= 6) {
7337 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7338 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7339 	}
7340 	/* pflip */
7341 	if (rdev->num_crtc >= 2) {
7342 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7343 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7344 	}
7345 	if (rdev->num_crtc >= 4) {
7346 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7347 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7348 	}
7349 	if (rdev->num_crtc >= 6) {
7350 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7351 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7352 	}
7353 
7354 	/* dac hotplug */
7355 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7356 
7357 	/* digital hotplug */
7358 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7360 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7362 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7364 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7366 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7367 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7368 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7369 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7370 
7371 }
7372 
7373 /**
7374  * cik_irq_init - init and enable the interrupt ring
7375  *
7376  * @rdev: radeon_device pointer
7377  *
7378  * Allocate a ring buffer for the interrupt controller,
7379  * enable the RLC, disable interrupts, enable the IH
7380  * ring buffer and enable it (CIK).
7381  * Called at device load and reume.
7382  * Returns 0 for success, errors for failure.
7383  */
7384 static int cik_irq_init(struct radeon_device *rdev)
7385 {
7386 	int ret = 0;
7387 	int rb_bufsz;
7388 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7389 
7390 	/* allocate ring */
7391 	ret = r600_ih_ring_alloc(rdev);
7392 	if (ret)
7393 		return ret;
7394 
7395 	/* disable irqs */
7396 	cik_disable_interrupts(rdev);
7397 
7398 	/* init rlc */
7399 	ret = cik_rlc_resume(rdev);
7400 	if (ret) {
7401 		r600_ih_ring_fini(rdev);
7402 		return ret;
7403 	}
7404 
7405 	/* setup interrupt control */
7406 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7407 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7408 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7409 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7410 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7411 	 */
7412 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7413 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7414 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7415 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7416 
7417 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7418 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7419 
7420 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7421 		      IH_WPTR_OVERFLOW_CLEAR |
7422 		      (rb_bufsz << 1));
7423 
7424 	if (rdev->wb.enabled)
7425 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7426 
7427 	/* set the writeback address whether it's enabled or not */
7428 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7429 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7430 
7431 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7432 
7433 	/* set rptr, wptr to 0 */
7434 	WREG32(IH_RB_RPTR, 0);
7435 	WREG32(IH_RB_WPTR, 0);
7436 
7437 	/* Default settings for IH_CNTL (disabled at first) */
7438 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7439 	/* RPTR_REARM only works if msi's are enabled */
7440 	if (rdev->msi_enabled)
7441 		ih_cntl |= RPTR_REARM;
7442 	WREG32(IH_CNTL, ih_cntl);
7443 
7444 	/* force the active interrupt state to all disabled */
7445 	cik_disable_interrupt_state(rdev);
7446 
7447 	pci_set_master(rdev->pdev);
7448 
7449 	/* enable irqs */
7450 	cik_enable_interrupts(rdev);
7451 
7452 	return ret;
7453 }
7454 
7455 /**
7456  * cik_irq_set - enable/disable interrupt sources
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Enable interrupt sources on the GPU (vblanks, hpd,
7461  * etc.) (CIK).
7462  * Returns 0 for success, errors for failure.
7463  */
7464 int cik_irq_set(struct radeon_device *rdev)
7465 {
7466 	u32 cp_int_cntl;
7467 	u32 cp_m1p0;
7468 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7469 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7470 	u32 grbm_int_cntl = 0;
7471 	u32 dma_cntl, dma_cntl1;
7472 
7473 	if (!rdev->irq.installed) {
7474 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7475 		return -EINVAL;
7476 	}
7477 	/* don't enable anything if the ih is disabled */
7478 	if (!rdev->ih.enabled) {
7479 		cik_disable_interrupts(rdev);
7480 		/* force the active interrupt state to all disabled */
7481 		cik_disable_interrupt_state(rdev);
7482 		return 0;
7483 	}
7484 
7485 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7486 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7487 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7488 
7489 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7492 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7493 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7494 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7495 
7496 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7497 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7498 
7499 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7500 
7501 	/* enable CP interrupts on all rings */
7502 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7503 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7504 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7505 	}
7506 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7507 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7508 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7509 		if (ring->me == 1) {
7510 			switch (ring->pipe) {
7511 			case 0:
7512 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7513 				break;
7514 			default:
7515 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7516 				break;
7517 			}
7518 		} else {
7519 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7520 		}
7521 	}
7522 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7523 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7524 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7525 		if (ring->me == 1) {
7526 			switch (ring->pipe) {
7527 			case 0:
7528 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7529 				break;
7530 			default:
7531 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7532 				break;
7533 			}
7534 		} else {
7535 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7536 		}
7537 	}
7538 
7539 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7540 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7541 		dma_cntl |= TRAP_ENABLE;
7542 	}
7543 
7544 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7545 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7546 		dma_cntl1 |= TRAP_ENABLE;
7547 	}
7548 
7549 	if (rdev->irq.crtc_vblank_int[0] ||
7550 	    atomic_read(&rdev->irq.pflip[0])) {
7551 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7552 		crtc1 |= VBLANK_INTERRUPT_MASK;
7553 	}
7554 	if (rdev->irq.crtc_vblank_int[1] ||
7555 	    atomic_read(&rdev->irq.pflip[1])) {
7556 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7557 		crtc2 |= VBLANK_INTERRUPT_MASK;
7558 	}
7559 	if (rdev->irq.crtc_vblank_int[2] ||
7560 	    atomic_read(&rdev->irq.pflip[2])) {
7561 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7562 		crtc3 |= VBLANK_INTERRUPT_MASK;
7563 	}
7564 	if (rdev->irq.crtc_vblank_int[3] ||
7565 	    atomic_read(&rdev->irq.pflip[3])) {
7566 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7567 		crtc4 |= VBLANK_INTERRUPT_MASK;
7568 	}
7569 	if (rdev->irq.crtc_vblank_int[4] ||
7570 	    atomic_read(&rdev->irq.pflip[4])) {
7571 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7572 		crtc5 |= VBLANK_INTERRUPT_MASK;
7573 	}
7574 	if (rdev->irq.crtc_vblank_int[5] ||
7575 	    atomic_read(&rdev->irq.pflip[5])) {
7576 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7577 		crtc6 |= VBLANK_INTERRUPT_MASK;
7578 	}
7579 	if (rdev->irq.hpd[0]) {
7580 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7581 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582 	}
7583 	if (rdev->irq.hpd[1]) {
7584 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7585 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586 	}
7587 	if (rdev->irq.hpd[2]) {
7588 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7589 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590 	}
7591 	if (rdev->irq.hpd[3]) {
7592 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7593 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594 	}
7595 	if (rdev->irq.hpd[4]) {
7596 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7597 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598 	}
7599 	if (rdev->irq.hpd[5]) {
7600 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7601 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7602 	}
7603 
7604 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7605 
7606 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7607 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7608 
7609 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7610 
7611 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7612 
7613 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7614 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7615 	if (rdev->num_crtc >= 4) {
7616 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7617 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7618 	}
7619 	if (rdev->num_crtc >= 6) {
7620 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7621 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7622 	}
7623 
7624 	if (rdev->num_crtc >= 2) {
7625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7626 		       GRPH_PFLIP_INT_MASK);
7627 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7628 		       GRPH_PFLIP_INT_MASK);
7629 	}
7630 	if (rdev->num_crtc >= 4) {
7631 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7632 		       GRPH_PFLIP_INT_MASK);
7633 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7634 		       GRPH_PFLIP_INT_MASK);
7635 	}
7636 	if (rdev->num_crtc >= 6) {
7637 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7638 		       GRPH_PFLIP_INT_MASK);
7639 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640 		       GRPH_PFLIP_INT_MASK);
7641 	}
7642 
7643 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7644 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7645 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7646 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7647 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7648 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7649 
7650 	/* posting read */
7651 	RREG32(SRBM_STATUS);
7652 
7653 	return 0;
7654 }
7655 
7656 /**
7657  * cik_irq_ack - ack interrupt sources
7658  *
7659  * @rdev: radeon_device pointer
7660  *
7661  * Ack interrupt sources on the GPU (vblanks, hpd,
7662  * etc.) (CIK).  Certain interrupts sources are sw
7663  * generated and do not require an explicit ack.
7664  */
7665 static inline void cik_irq_ack(struct radeon_device *rdev)
7666 {
7667 	u32 tmp;
7668 
7669 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7670 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7671 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7672 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7673 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7674 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7675 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7676 
7677 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7678 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7679 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7680 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7681 	if (rdev->num_crtc >= 4) {
7682 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7683 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7684 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7685 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7686 	}
7687 	if (rdev->num_crtc >= 6) {
7688 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7689 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7690 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7691 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7692 	}
7693 
7694 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7695 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7696 		       GRPH_PFLIP_INT_CLEAR);
7697 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7698 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7699 		       GRPH_PFLIP_INT_CLEAR);
7700 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7701 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7702 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7703 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7704 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7705 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7706 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7707 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7708 
7709 	if (rdev->num_crtc >= 4) {
7710 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7711 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7712 			       GRPH_PFLIP_INT_CLEAR);
7713 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7714 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7715 			       GRPH_PFLIP_INT_CLEAR);
7716 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7717 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7718 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7719 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7720 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7721 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7722 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7723 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7724 	}
7725 
7726 	if (rdev->num_crtc >= 6) {
7727 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7728 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7729 			       GRPH_PFLIP_INT_CLEAR);
7730 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7731 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7732 			       GRPH_PFLIP_INT_CLEAR);
7733 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7734 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7735 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7736 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7737 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7738 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7739 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7740 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7741 	}
7742 
7743 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7744 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7745 		tmp |= DC_HPDx_INT_ACK;
7746 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7747 	}
7748 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7749 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7750 		tmp |= DC_HPDx_INT_ACK;
7751 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7752 	}
7753 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7754 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7755 		tmp |= DC_HPDx_INT_ACK;
7756 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7757 	}
7758 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7759 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7760 		tmp |= DC_HPDx_INT_ACK;
7761 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7762 	}
7763 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7764 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7765 		tmp |= DC_HPDx_INT_ACK;
7766 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7767 	}
7768 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7769 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7770 		tmp |= DC_HPDx_INT_ACK;
7771 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7772 	}
7773 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7774 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7775 		tmp |= DC_HPDx_RX_INT_ACK;
7776 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7777 	}
7778 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7779 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7780 		tmp |= DC_HPDx_RX_INT_ACK;
7781 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7782 	}
7783 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7784 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7785 		tmp |= DC_HPDx_RX_INT_ACK;
7786 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7787 	}
7788 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7789 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7790 		tmp |= DC_HPDx_RX_INT_ACK;
7791 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7792 	}
7793 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7794 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7795 		tmp |= DC_HPDx_RX_INT_ACK;
7796 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7797 	}
7798 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7799 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7800 		tmp |= DC_HPDx_RX_INT_ACK;
7801 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7802 	}
7803 }
7804 
7805 /**
7806  * cik_irq_disable - disable interrupts
7807  *
7808  * @rdev: radeon_device pointer
7809  *
7810  * Disable interrupts on the hw (CIK).
7811  */
7812 static void cik_irq_disable(struct radeon_device *rdev)
7813 {
7814 	cik_disable_interrupts(rdev);
7815 	/* Wait and acknowledge irq */
7816 	mdelay(1);
7817 	cik_irq_ack(rdev);
7818 	cik_disable_interrupt_state(rdev);
7819 }
7820 
7821 /**
7822  * cik_irq_disable - disable interrupts for suspend
7823  *
7824  * @rdev: radeon_device pointer
7825  *
7826  * Disable interrupts and stop the RLC (CIK).
7827  * Used for suspend.
7828  */
7829 static void cik_irq_suspend(struct radeon_device *rdev)
7830 {
7831 	cik_irq_disable(rdev);
7832 	cik_rlc_stop(rdev);
7833 }
7834 
7835 /**
7836  * cik_irq_fini - tear down interrupt support
7837  *
7838  * @rdev: radeon_device pointer
7839  *
7840  * Disable interrupts on the hw and free the IH ring
7841  * buffer (CIK).
7842  * Used for driver unload.
7843  */
7844 static void cik_irq_fini(struct radeon_device *rdev)
7845 {
7846 	cik_irq_suspend(rdev);
7847 	r600_ih_ring_fini(rdev);
7848 }
7849 
7850 /**
7851  * cik_get_ih_wptr - get the IH ring buffer wptr
7852  *
7853  * @rdev: radeon_device pointer
7854  *
7855  * Get the IH ring buffer wptr from either the register
7856  * or the writeback memory buffer (CIK).  Also check for
7857  * ring buffer overflow and deal with it.
7858  * Used by cik_irq_process().
7859  * Returns the value of the wptr.
7860  */
7861 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7862 {
7863 	u32 wptr, tmp;
7864 
7865 	if (rdev->wb.enabled)
7866 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7867 	else
7868 		wptr = RREG32(IH_RB_WPTR);
7869 
7870 	if (wptr & RB_OVERFLOW) {
7871 		wptr &= ~RB_OVERFLOW;
7872 		/* When a ring buffer overflow happen start parsing interrupt
7873 		 * from the last not overwritten vector (wptr + 16). Hopefully
7874 		 * this should allow us to catchup.
7875 		 */
7876 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7877 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7878 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7879 		tmp = RREG32(IH_RB_CNTL);
7880 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7881 		WREG32(IH_RB_CNTL, tmp);
7882 	}
7883 	return (wptr & rdev->ih.ptr_mask);
7884 }
7885 
7886 /*        CIK IV Ring
7887  * Each IV ring entry is 128 bits:
7888  * [7:0]    - interrupt source id
7889  * [31:8]   - reserved
7890  * [59:32]  - interrupt source data
7891  * [63:60]  - reserved
7892  * [71:64]  - RINGID
7893  *            CP:
7894  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7895  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7896  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7897  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7898  *            PIPE_ID - ME0 0=3D
7899  *                    - ME1&2 compute dispatcher (4 pipes each)
7900  *            SDMA:
7901  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7902  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7903  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7904  * [79:72]  - VMID
7905  * [95:80]  - PASID
7906  * [127:96] - reserved
7907  */
7908 /**
7909  * cik_irq_process - interrupt handler
7910  *
7911  * @rdev: radeon_device pointer
7912  *
7913  * Interrupt hander (CIK).  Walk the IH ring,
7914  * ack interrupts and schedule work to handle
7915  * interrupt events.
7916  * Returns irq process return code.
7917  */
7918 int cik_irq_process(struct radeon_device *rdev)
7919 {
7920 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7921 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7922 	u32 wptr;
7923 	u32 rptr;
7924 	u32 src_id, src_data, ring_id;
7925 	u8 me_id, pipe_id, queue_id;
7926 	u32 ring_index;
7927 	bool queue_hotplug = false;
7928 	bool queue_dp = false;
7929 	bool queue_reset = false;
7930 	u32 addr, status, mc_client;
7931 	bool queue_thermal = false;
7932 
7933 	if (!rdev->ih.enabled || rdev->shutdown)
7934 		return IRQ_NONE;
7935 
7936 	wptr = cik_get_ih_wptr(rdev);
7937 
7938 restart_ih:
7939 	/* is somebody else already processing irqs? */
7940 	if (atomic_xchg(&rdev->ih.lock, 1))
7941 		return IRQ_NONE;
7942 
7943 	rptr = rdev->ih.rptr;
7944 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7945 
7946 	/* Order reading of wptr vs. reading of IH ring data */
7947 	rmb();
7948 
7949 	/* display interrupts */
7950 	cik_irq_ack(rdev);
7951 
7952 	while (rptr != wptr) {
7953 		/* wptr/rptr are in bytes! */
7954 		ring_index = rptr / 4;
7955 
7956 		radeon_kfd_interrupt(rdev,
7957 				(const void *) &rdev->ih.ring[ring_index]);
7958 
7959 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7960 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7961 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7962 
7963 		switch (src_id) {
7964 		case 1: /* D1 vblank/vline */
7965 			switch (src_data) {
7966 			case 0: /* D1 vblank */
7967 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7968 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7969 
7970 				if (rdev->irq.crtc_vblank_int[0]) {
7971 					drm_handle_vblank(rdev->ddev, 0);
7972 					rdev->pm.vblank_sync = true;
7973 					wake_up(&rdev->irq.vblank_queue);
7974 				}
7975 				if (atomic_read(&rdev->irq.pflip[0]))
7976 					radeon_crtc_handle_vblank(rdev, 0);
7977 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7978 				DRM_DEBUG("IH: D1 vblank\n");
7979 
7980 				break;
7981 			case 1: /* D1 vline */
7982 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7983 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7984 
7985 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7986 				DRM_DEBUG("IH: D1 vline\n");
7987 
7988 				break;
7989 			default:
7990 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7991 				break;
7992 			}
7993 			break;
7994 		case 2: /* D2 vblank/vline */
7995 			switch (src_data) {
7996 			case 0: /* D2 vblank */
7997 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7998 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7999 
8000 				if (rdev->irq.crtc_vblank_int[1]) {
8001 					drm_handle_vblank(rdev->ddev, 1);
8002 					rdev->pm.vblank_sync = true;
8003 					wake_up(&rdev->irq.vblank_queue);
8004 				}
8005 				if (atomic_read(&rdev->irq.pflip[1]))
8006 					radeon_crtc_handle_vblank(rdev, 1);
8007 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8008 				DRM_DEBUG("IH: D2 vblank\n");
8009 
8010 				break;
8011 			case 1: /* D2 vline */
8012 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8013 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8014 
8015 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8016 				DRM_DEBUG("IH: D2 vline\n");
8017 
8018 				break;
8019 			default:
8020 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8021 				break;
8022 			}
8023 			break;
8024 		case 3: /* D3 vblank/vline */
8025 			switch (src_data) {
8026 			case 0: /* D3 vblank */
8027 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8028 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8029 
8030 				if (rdev->irq.crtc_vblank_int[2]) {
8031 					drm_handle_vblank(rdev->ddev, 2);
8032 					rdev->pm.vblank_sync = true;
8033 					wake_up(&rdev->irq.vblank_queue);
8034 				}
8035 				if (atomic_read(&rdev->irq.pflip[2]))
8036 					radeon_crtc_handle_vblank(rdev, 2);
8037 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8038 				DRM_DEBUG("IH: D3 vblank\n");
8039 
8040 				break;
8041 			case 1: /* D3 vline */
8042 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8043 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8044 
8045 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8046 				DRM_DEBUG("IH: D3 vline\n");
8047 
8048 				break;
8049 			default:
8050 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8051 				break;
8052 			}
8053 			break;
8054 		case 4: /* D4 vblank/vline */
8055 			switch (src_data) {
8056 			case 0: /* D4 vblank */
8057 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8058 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8059 
8060 				if (rdev->irq.crtc_vblank_int[3]) {
8061 					drm_handle_vblank(rdev->ddev, 3);
8062 					rdev->pm.vblank_sync = true;
8063 					wake_up(&rdev->irq.vblank_queue);
8064 				}
8065 				if (atomic_read(&rdev->irq.pflip[3]))
8066 					radeon_crtc_handle_vblank(rdev, 3);
8067 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8068 				DRM_DEBUG("IH: D4 vblank\n");
8069 
8070 				break;
8071 			case 1: /* D4 vline */
8072 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8073 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8074 
8075 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8076 				DRM_DEBUG("IH: D4 vline\n");
8077 
8078 				break;
8079 			default:
8080 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8081 				break;
8082 			}
8083 			break;
8084 		case 5: /* D5 vblank/vline */
8085 			switch (src_data) {
8086 			case 0: /* D5 vblank */
8087 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8088 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8089 
8090 				if (rdev->irq.crtc_vblank_int[4]) {
8091 					drm_handle_vblank(rdev->ddev, 4);
8092 					rdev->pm.vblank_sync = true;
8093 					wake_up(&rdev->irq.vblank_queue);
8094 				}
8095 				if (atomic_read(&rdev->irq.pflip[4]))
8096 					radeon_crtc_handle_vblank(rdev, 4);
8097 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8098 				DRM_DEBUG("IH: D5 vblank\n");
8099 
8100 				break;
8101 			case 1: /* D5 vline */
8102 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8103 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8104 
8105 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8106 				DRM_DEBUG("IH: D5 vline\n");
8107 
8108 				break;
8109 			default:
8110 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8111 				break;
8112 			}
8113 			break;
8114 		case 6: /* D6 vblank/vline */
8115 			switch (src_data) {
8116 			case 0: /* D6 vblank */
8117 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8118 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8119 
8120 				if (rdev->irq.crtc_vblank_int[5]) {
8121 					drm_handle_vblank(rdev->ddev, 5);
8122 					rdev->pm.vblank_sync = true;
8123 					wake_up(&rdev->irq.vblank_queue);
8124 				}
8125 				if (atomic_read(&rdev->irq.pflip[5]))
8126 					radeon_crtc_handle_vblank(rdev, 5);
8127 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8128 				DRM_DEBUG("IH: D6 vblank\n");
8129 
8130 				break;
8131 			case 1: /* D6 vline */
8132 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8133 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134 
8135 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8136 				DRM_DEBUG("IH: D6 vline\n");
8137 
8138 				break;
8139 			default:
8140 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8141 				break;
8142 			}
8143 			break;
8144 		case 8: /* D1 page flip */
8145 		case 10: /* D2 page flip */
8146 		case 12: /* D3 page flip */
8147 		case 14: /* D4 page flip */
8148 		case 16: /* D5 page flip */
8149 		case 18: /* D6 page flip */
8150 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8151 			if (radeon_use_pflipirq > 0)
8152 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8153 			break;
8154 		case 42: /* HPD hotplug */
8155 			switch (src_data) {
8156 			case 0:
8157 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8158 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8159 
8160 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8161 				queue_hotplug = true;
8162 				DRM_DEBUG("IH: HPD1\n");
8163 
8164 				break;
8165 			case 1:
8166 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8167 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8168 
8169 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8170 				queue_hotplug = true;
8171 				DRM_DEBUG("IH: HPD2\n");
8172 
8173 				break;
8174 			case 2:
8175 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8176 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8177 
8178 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8179 				queue_hotplug = true;
8180 				DRM_DEBUG("IH: HPD3\n");
8181 
8182 				break;
8183 			case 3:
8184 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8185 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8186 
8187 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8188 				queue_hotplug = true;
8189 				DRM_DEBUG("IH: HPD4\n");
8190 
8191 				break;
8192 			case 4:
8193 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8194 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8195 
8196 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8197 				queue_hotplug = true;
8198 				DRM_DEBUG("IH: HPD5\n");
8199 
8200 				break;
8201 			case 5:
8202 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8203 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8204 
8205 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8206 				queue_hotplug = true;
8207 				DRM_DEBUG("IH: HPD6\n");
8208 
8209 				break;
8210 			case 6:
8211 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8212 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8213 
8214 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8215 				queue_dp = true;
8216 				DRM_DEBUG("IH: HPD_RX 1\n");
8217 
8218 				break;
8219 			case 7:
8220 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8221 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8222 
8223 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8224 				queue_dp = true;
8225 				DRM_DEBUG("IH: HPD_RX 2\n");
8226 
8227 				break;
8228 			case 8:
8229 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8230 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8231 
8232 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8233 				queue_dp = true;
8234 				DRM_DEBUG("IH: HPD_RX 3\n");
8235 
8236 				break;
8237 			case 9:
8238 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8239 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8240 
8241 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8242 				queue_dp = true;
8243 				DRM_DEBUG("IH: HPD_RX 4\n");
8244 
8245 				break;
8246 			case 10:
8247 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8248 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8249 
8250 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8251 				queue_dp = true;
8252 				DRM_DEBUG("IH: HPD_RX 5\n");
8253 
8254 				break;
8255 			case 11:
8256 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8257 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8258 
8259 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8260 				queue_dp = true;
8261 				DRM_DEBUG("IH: HPD_RX 6\n");
8262 
8263 				break;
8264 			default:
8265 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8266 				break;
8267 			}
8268 			break;
8269 		case 96:
8270 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8271 			WREG32(SRBM_INT_ACK, 0x1);
8272 			break;
8273 		case 124: /* UVD */
8274 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8275 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8276 			break;
8277 		case 146:
8278 		case 147:
8279 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8280 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8281 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8282 			/* reset addr and status */
8283 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8284 			if (addr == 0x0 && status == 0x0)
8285 				break;
8286 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8287 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8288 				addr);
8289 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8290 				status);
8291 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8292 			break;
8293 		case 167: /* VCE */
8294 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8295 			switch (src_data) {
8296 			case 0:
8297 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8298 				break;
8299 			case 1:
8300 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8301 				break;
8302 			default:
8303 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8304 				break;
8305 			}
8306 			break;
8307 		case 176: /* GFX RB CP_INT */
8308 		case 177: /* GFX IB CP_INT */
8309 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8310 			break;
8311 		case 181: /* CP EOP event */
8312 			DRM_DEBUG("IH: CP EOP\n");
8313 			/* XXX check the bitfield order! */
8314 			me_id = (ring_id & 0x60) >> 5;
8315 			pipe_id = (ring_id & 0x18) >> 3;
8316 			queue_id = (ring_id & 0x7) >> 0;
8317 			switch (me_id) {
8318 			case 0:
8319 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8320 				break;
8321 			case 1:
8322 			case 2:
8323 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8324 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8325 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8326 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8327 				break;
8328 			}
8329 			break;
8330 		case 184: /* CP Privileged reg access */
8331 			DRM_ERROR("Illegal register access in command stream\n");
8332 			/* XXX check the bitfield order! */
8333 			me_id = (ring_id & 0x60) >> 5;
8334 			pipe_id = (ring_id & 0x18) >> 3;
8335 			queue_id = (ring_id & 0x7) >> 0;
8336 			switch (me_id) {
8337 			case 0:
8338 				/* This results in a full GPU reset, but all we need to do is soft
8339 				 * reset the CP for gfx
8340 				 */
8341 				queue_reset = true;
8342 				break;
8343 			case 1:
8344 				/* XXX compute */
8345 				queue_reset = true;
8346 				break;
8347 			case 2:
8348 				/* XXX compute */
8349 				queue_reset = true;
8350 				break;
8351 			}
8352 			break;
8353 		case 185: /* CP Privileged inst */
8354 			DRM_ERROR("Illegal instruction in command stream\n");
8355 			/* XXX check the bitfield order! */
8356 			me_id = (ring_id & 0x60) >> 5;
8357 			pipe_id = (ring_id & 0x18) >> 3;
8358 			queue_id = (ring_id & 0x7) >> 0;
8359 			switch (me_id) {
8360 			case 0:
8361 				/* This results in a full GPU reset, but all we need to do is soft
8362 				 * reset the CP for gfx
8363 				 */
8364 				queue_reset = true;
8365 				break;
8366 			case 1:
8367 				/* XXX compute */
8368 				queue_reset = true;
8369 				break;
8370 			case 2:
8371 				/* XXX compute */
8372 				queue_reset = true;
8373 				break;
8374 			}
8375 			break;
8376 		case 224: /* SDMA trap event */
8377 			/* XXX check the bitfield order! */
8378 			me_id = (ring_id & 0x3) >> 0;
8379 			queue_id = (ring_id & 0xc) >> 2;
8380 			DRM_DEBUG("IH: SDMA trap\n");
8381 			switch (me_id) {
8382 			case 0:
8383 				switch (queue_id) {
8384 				case 0:
8385 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8386 					break;
8387 				case 1:
8388 					/* XXX compute */
8389 					break;
8390 				case 2:
8391 					/* XXX compute */
8392 					break;
8393 				}
8394 				break;
8395 			case 1:
8396 				switch (queue_id) {
8397 				case 0:
8398 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8399 					break;
8400 				case 1:
8401 					/* XXX compute */
8402 					break;
8403 				case 2:
8404 					/* XXX compute */
8405 					break;
8406 				}
8407 				break;
8408 			}
8409 			break;
8410 		case 230: /* thermal low to high */
8411 			DRM_DEBUG("IH: thermal low to high\n");
8412 			rdev->pm.dpm.thermal.high_to_low = false;
8413 			queue_thermal = true;
8414 			break;
8415 		case 231: /* thermal high to low */
8416 			DRM_DEBUG("IH: thermal high to low\n");
8417 			rdev->pm.dpm.thermal.high_to_low = true;
8418 			queue_thermal = true;
8419 			break;
8420 		case 233: /* GUI IDLE */
8421 			DRM_DEBUG("IH: GUI idle\n");
8422 			break;
8423 		case 241: /* SDMA Privileged inst */
8424 		case 247: /* SDMA Privileged inst */
8425 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8426 			/* XXX check the bitfield order! */
8427 			me_id = (ring_id & 0x3) >> 0;
8428 			queue_id = (ring_id & 0xc) >> 2;
8429 			switch (me_id) {
8430 			case 0:
8431 				switch (queue_id) {
8432 				case 0:
8433 					queue_reset = true;
8434 					break;
8435 				case 1:
8436 					/* XXX compute */
8437 					queue_reset = true;
8438 					break;
8439 				case 2:
8440 					/* XXX compute */
8441 					queue_reset = true;
8442 					break;
8443 				}
8444 				break;
8445 			case 1:
8446 				switch (queue_id) {
8447 				case 0:
8448 					queue_reset = true;
8449 					break;
8450 				case 1:
8451 					/* XXX compute */
8452 					queue_reset = true;
8453 					break;
8454 				case 2:
8455 					/* XXX compute */
8456 					queue_reset = true;
8457 					break;
8458 				}
8459 				break;
8460 			}
8461 			break;
8462 		default:
8463 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8464 			break;
8465 		}
8466 
8467 		/* wptr/rptr are in bytes! */
8468 		rptr += 16;
8469 		rptr &= rdev->ih.ptr_mask;
8470 		WREG32(IH_RB_RPTR, rptr);
8471 	}
8472 	if (queue_dp)
8473 		schedule_work(&rdev->dp_work);
8474 	if (queue_hotplug)
8475 		schedule_work(&rdev->hotplug_work);
8476 	if (queue_reset) {
8477 		rdev->needs_reset = true;
8478 		wake_up_all(&rdev->fence_queue);
8479 	}
8480 	if (queue_thermal)
8481 		schedule_work(&rdev->pm.dpm.thermal.work);
8482 	rdev->ih.rptr = rptr;
8483 	atomic_set(&rdev->ih.lock, 0);
8484 
8485 	/* make sure wptr hasn't changed while processing */
8486 	wptr = cik_get_ih_wptr(rdev);
8487 	if (wptr != rptr)
8488 		goto restart_ih;
8489 
8490 	return IRQ_HANDLED;
8491 }
8492 
8493 /*
8494  * startup/shutdown callbacks
8495  */
8496 /**
8497  * cik_startup - program the asic to a functional state
8498  *
8499  * @rdev: radeon_device pointer
8500  *
8501  * Programs the asic to a functional state (CIK).
8502  * Called by cik_init() and cik_resume().
8503  * Returns 0 for success, error for failure.
8504  */
8505 static int cik_startup(struct radeon_device *rdev)
8506 {
8507 	struct radeon_ring *ring;
8508 	u32 nop;
8509 	int r;
8510 
8511 	/* enable pcie gen2/3 link */
8512 	cik_pcie_gen3_enable(rdev);
8513 	/* enable aspm */
8514 	cik_program_aspm(rdev);
8515 
8516 	/* scratch needs to be initialized before MC */
8517 	r = r600_vram_scratch_init(rdev);
8518 	if (r)
8519 		return r;
8520 
8521 	cik_mc_program(rdev);
8522 
8523 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8524 		r = ci_mc_load_microcode(rdev);
8525 		if (r) {
8526 			DRM_ERROR("Failed to load MC firmware!\n");
8527 			return r;
8528 		}
8529 	}
8530 
8531 	r = cik_pcie_gart_enable(rdev);
8532 	if (r)
8533 		return r;
8534 	cik_gpu_init(rdev);
8535 
8536 	/* allocate rlc buffers */
8537 	if (rdev->flags & RADEON_IS_IGP) {
8538 		if (rdev->family == CHIP_KAVERI) {
8539 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8540 			rdev->rlc.reg_list_size =
8541 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8542 		} else {
8543 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8544 			rdev->rlc.reg_list_size =
8545 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8546 		}
8547 	}
8548 	rdev->rlc.cs_data = ci_cs_data;
8549 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8550 	r = sumo_rlc_init(rdev);
8551 	if (r) {
8552 		DRM_ERROR("Failed to init rlc BOs!\n");
8553 		return r;
8554 	}
8555 
8556 	/* allocate wb buffer */
8557 	r = radeon_wb_init(rdev);
8558 	if (r)
8559 		return r;
8560 
8561 	/* allocate mec buffers */
8562 	r = cik_mec_init(rdev);
8563 	if (r) {
8564 		DRM_ERROR("Failed to init MEC BOs!\n");
8565 		return r;
8566 	}
8567 
8568 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8569 	if (r) {
8570 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8571 		return r;
8572 	}
8573 
8574 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8575 	if (r) {
8576 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8577 		return r;
8578 	}
8579 
8580 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8581 	if (r) {
8582 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8583 		return r;
8584 	}
8585 
8586 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8587 	if (r) {
8588 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8589 		return r;
8590 	}
8591 
8592 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8593 	if (r) {
8594 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8595 		return r;
8596 	}
8597 
8598 	r = radeon_uvd_resume(rdev);
8599 	if (!r) {
8600 		r = uvd_v4_2_resume(rdev);
8601 		if (!r) {
8602 			r = radeon_fence_driver_start_ring(rdev,
8603 							   R600_RING_TYPE_UVD_INDEX);
8604 			if (r)
8605 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8606 		}
8607 	}
8608 	if (r)
8609 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8610 
8611 	r = radeon_vce_resume(rdev);
8612 	if (!r) {
8613 		r = vce_v2_0_resume(rdev);
8614 		if (!r)
8615 			r = radeon_fence_driver_start_ring(rdev,
8616 							   TN_RING_TYPE_VCE1_INDEX);
8617 		if (!r)
8618 			r = radeon_fence_driver_start_ring(rdev,
8619 							   TN_RING_TYPE_VCE2_INDEX);
8620 	}
8621 	if (r) {
8622 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8623 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8624 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8625 	}
8626 
8627 	/* Enable IRQ */
8628 	if (!rdev->irq.installed) {
8629 		r = radeon_irq_kms_init(rdev);
8630 		if (r)
8631 			return r;
8632 	}
8633 
8634 	r = cik_irq_init(rdev);
8635 	if (r) {
8636 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8637 		radeon_irq_kms_fini(rdev);
8638 		return r;
8639 	}
8640 	cik_irq_set(rdev);
8641 
8642 	if (rdev->family == CHIP_HAWAII) {
8643 		if (rdev->new_fw)
8644 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8645 		else
8646 			nop = RADEON_CP_PACKET2;
8647 	} else {
8648 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8649 	}
8650 
8651 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8652 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8653 			     nop);
8654 	if (r)
8655 		return r;
8656 
8657 	/* set up the compute queues */
8658 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8659 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8660 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8661 			     nop);
8662 	if (r)
8663 		return r;
8664 	ring->me = 1; /* first MEC */
8665 	ring->pipe = 0; /* first pipe */
8666 	ring->queue = 0; /* first queue */
8667 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8668 
8669 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8670 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8671 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8672 			     nop);
8673 	if (r)
8674 		return r;
8675 	/* dGPU only have 1 MEC */
8676 	ring->me = 1; /* first MEC */
8677 	ring->pipe = 0; /* first pipe */
8678 	ring->queue = 1; /* second queue */
8679 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8680 
8681 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8682 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8683 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8684 	if (r)
8685 		return r;
8686 
8687 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8688 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8689 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8690 	if (r)
8691 		return r;
8692 
8693 	r = cik_cp_resume(rdev);
8694 	if (r)
8695 		return r;
8696 
8697 	r = cik_sdma_resume(rdev);
8698 	if (r)
8699 		return r;
8700 
8701 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8702 	if (ring->ring_size) {
8703 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8704 				     RADEON_CP_PACKET2);
8705 		if (!r)
8706 			r = uvd_v1_0_init(rdev);
8707 		if (r)
8708 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8709 	}
8710 
8711 	r = -ENOENT;
8712 
8713 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8714 	if (ring->ring_size)
8715 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8716 				     VCE_CMD_NO_OP);
8717 
8718 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8719 	if (ring->ring_size)
8720 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8721 				     VCE_CMD_NO_OP);
8722 
8723 	if (!r)
8724 		r = vce_v1_0_init(rdev);
8725 	else if (r != -ENOENT)
8726 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8727 
8728 	r = radeon_ib_pool_init(rdev);
8729 	if (r) {
8730 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8731 		return r;
8732 	}
8733 
8734 	r = radeon_vm_manager_init(rdev);
8735 	if (r) {
8736 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8737 		return r;
8738 	}
8739 
8740 	r = radeon_audio_init(rdev);
8741 	if (r)
8742 		return r;
8743 
8744 	r = radeon_kfd_resume(rdev);
8745 	if (r)
8746 		return r;
8747 
8748 	return 0;
8749 }
8750 
8751 /**
8752  * cik_resume - resume the asic to a functional state
8753  *
8754  * @rdev: radeon_device pointer
8755  *
8756  * Programs the asic to a functional state (CIK).
8757  * Called at resume.
8758  * Returns 0 for success, error for failure.
8759  */
8760 int cik_resume(struct radeon_device *rdev)
8761 {
8762 	int r;
8763 
8764 	/* post card */
8765 	atom_asic_init(rdev->mode_info.atom_context);
8766 
8767 	/* init golden registers */
8768 	cik_init_golden_registers(rdev);
8769 
8770 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8771 		radeon_pm_resume(rdev);
8772 
8773 	rdev->accel_working = true;
8774 	r = cik_startup(rdev);
8775 	if (r) {
8776 		DRM_ERROR("cik startup failed on resume\n");
8777 		rdev->accel_working = false;
8778 		return r;
8779 	}
8780 
8781 	return r;
8782 
8783 }
8784 
8785 /**
8786  * cik_suspend - suspend the asic
8787  *
8788  * @rdev: radeon_device pointer
8789  *
8790  * Bring the chip into a state suitable for suspend (CIK).
8791  * Called at suspend.
8792  * Returns 0 for success.
8793  */
8794 int cik_suspend(struct radeon_device *rdev)
8795 {
8796 	radeon_kfd_suspend(rdev);
8797 	radeon_pm_suspend(rdev);
8798 	radeon_audio_fini(rdev);
8799 	radeon_vm_manager_fini(rdev);
8800 	cik_cp_enable(rdev, false);
8801 	cik_sdma_enable(rdev, false);
8802 	uvd_v1_0_fini(rdev);
8803 	radeon_uvd_suspend(rdev);
8804 	radeon_vce_suspend(rdev);
8805 	cik_fini_pg(rdev);
8806 	cik_fini_cg(rdev);
8807 	cik_irq_suspend(rdev);
8808 	radeon_wb_disable(rdev);
8809 	cik_pcie_gart_disable(rdev);
8810 	return 0;
8811 }
8812 
8813 /* Plan is to move initialization in that function and use
8814  * helper function so that radeon_device_init pretty much
8815  * do nothing more than calling asic specific function. This
8816  * should also allow to remove a bunch of callback function
8817  * like vram_info.
8818  */
8819 /**
8820  * cik_init - asic specific driver and hw init
8821  *
8822  * @rdev: radeon_device pointer
8823  *
8824  * Setup asic specific driver variables and program the hw
8825  * to a functional state (CIK).
8826  * Called at driver startup.
8827  * Returns 0 for success, errors for failure.
8828  */
8829 int cik_init(struct radeon_device *rdev)
8830 {
8831 	struct radeon_ring *ring;
8832 	int r;
8833 
8834 	/* Read BIOS */
8835 	if (!radeon_get_bios(rdev)) {
8836 		if (ASIC_IS_AVIVO(rdev))
8837 			return -EINVAL;
8838 	}
8839 	/* Must be an ATOMBIOS */
8840 	if (!rdev->is_atom_bios) {
8841 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8842 		return -EINVAL;
8843 	}
8844 	r = radeon_atombios_init(rdev);
8845 	if (r)
8846 		return r;
8847 
8848 	/* Post card if necessary */
8849 	if (!radeon_card_posted(rdev)) {
8850 		if (!rdev->bios) {
8851 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8852 			return -EINVAL;
8853 		}
8854 		DRM_INFO("GPU not posted. posting now...\n");
8855 		atom_asic_init(rdev->mode_info.atom_context);
8856 	}
8857 	/* init golden registers */
8858 	cik_init_golden_registers(rdev);
8859 	/* Initialize scratch registers */
8860 	cik_scratch_init(rdev);
8861 	/* Initialize surface registers */
8862 	radeon_surface_init(rdev);
8863 	/* Initialize clocks */
8864 	radeon_get_clock_info(rdev->ddev);
8865 
8866 	/* Fence driver */
8867 	r = radeon_fence_driver_init(rdev);
8868 	if (r)
8869 		return r;
8870 
8871 	/* initialize memory controller */
8872 	r = cik_mc_init(rdev);
8873 	if (r)
8874 		return r;
8875 	/* Memory manager */
8876 	r = radeon_bo_init(rdev);
8877 	if (r)
8878 		return r;
8879 
8880 	if (rdev->flags & RADEON_IS_IGP) {
8881 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8882 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8883 			r = cik_init_microcode(rdev);
8884 			if (r) {
8885 				DRM_ERROR("Failed to load firmware!\n");
8886 				return r;
8887 			}
8888 		}
8889 	} else {
8890 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8891 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8892 		    !rdev->mc_fw) {
8893 			r = cik_init_microcode(rdev);
8894 			if (r) {
8895 				DRM_ERROR("Failed to load firmware!\n");
8896 				return r;
8897 			}
8898 		}
8899 	}
8900 
8901 	/* Initialize power management */
8902 	radeon_pm_init(rdev);
8903 
8904 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8905 	ring->ring_obj = NULL;
8906 	r600_ring_init(rdev, ring, 1024 * 1024);
8907 
8908 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8909 	ring->ring_obj = NULL;
8910 	r600_ring_init(rdev, ring, 1024 * 1024);
8911 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8912 	if (r)
8913 		return r;
8914 
8915 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8916 	ring->ring_obj = NULL;
8917 	r600_ring_init(rdev, ring, 1024 * 1024);
8918 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8919 	if (r)
8920 		return r;
8921 
8922 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8923 	ring->ring_obj = NULL;
8924 	r600_ring_init(rdev, ring, 256 * 1024);
8925 
8926 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8927 	ring->ring_obj = NULL;
8928 	r600_ring_init(rdev, ring, 256 * 1024);
8929 
8930 	r = radeon_uvd_init(rdev);
8931 	if (!r) {
8932 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8933 		ring->ring_obj = NULL;
8934 		r600_ring_init(rdev, ring, 4096);
8935 	}
8936 
8937 	r = radeon_vce_init(rdev);
8938 	if (!r) {
8939 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8940 		ring->ring_obj = NULL;
8941 		r600_ring_init(rdev, ring, 4096);
8942 
8943 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8944 		ring->ring_obj = NULL;
8945 		r600_ring_init(rdev, ring, 4096);
8946 	}
8947 
8948 	rdev->ih.ring_obj = NULL;
8949 	r600_ih_ring_init(rdev, 64 * 1024);
8950 
8951 	r = r600_pcie_gart_init(rdev);
8952 	if (r)
8953 		return r;
8954 
8955 	rdev->accel_working = true;
8956 	r = cik_startup(rdev);
8957 	if (r) {
8958 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8959 		cik_cp_fini(rdev);
8960 		cik_sdma_fini(rdev);
8961 		cik_irq_fini(rdev);
8962 		sumo_rlc_fini(rdev);
8963 		cik_mec_fini(rdev);
8964 		radeon_wb_fini(rdev);
8965 		radeon_ib_pool_fini(rdev);
8966 		radeon_vm_manager_fini(rdev);
8967 		radeon_irq_kms_fini(rdev);
8968 		cik_pcie_gart_fini(rdev);
8969 		rdev->accel_working = false;
8970 	}
8971 
8972 	/* Don't start up if the MC ucode is missing.
8973 	 * The default clocks and voltages before the MC ucode
8974 	 * is loaded are not suffient for advanced operations.
8975 	 */
8976 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8977 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8978 		return -EINVAL;
8979 	}
8980 
8981 	return 0;
8982 }
8983 
8984 /**
8985  * cik_fini - asic specific driver and hw fini
8986  *
8987  * @rdev: radeon_device pointer
8988  *
8989  * Tear down the asic specific driver variables and program the hw
8990  * to an idle state (CIK).
8991  * Called at driver unload.
8992  */
8993 void cik_fini(struct radeon_device *rdev)
8994 {
8995 	radeon_pm_fini(rdev);
8996 	cik_cp_fini(rdev);
8997 	cik_sdma_fini(rdev);
8998 	cik_fini_pg(rdev);
8999 	cik_fini_cg(rdev);
9000 	cik_irq_fini(rdev);
9001 	sumo_rlc_fini(rdev);
9002 	cik_mec_fini(rdev);
9003 	radeon_wb_fini(rdev);
9004 	radeon_vm_manager_fini(rdev);
9005 	radeon_ib_pool_fini(rdev);
9006 	radeon_irq_kms_fini(rdev);
9007 	uvd_v1_0_fini(rdev);
9008 	radeon_uvd_fini(rdev);
9009 	radeon_vce_fini(rdev);
9010 	cik_pcie_gart_fini(rdev);
9011 	r600_vram_scratch_fini(rdev);
9012 	radeon_gem_fini(rdev);
9013 	radeon_fence_driver_fini(rdev);
9014 	radeon_bo_fini(rdev);
9015 	radeon_atombios_fini(rdev);
9016 	kfree(rdev->bios);
9017 	rdev->bios = NULL;
9018 }
9019 
9020 void dce8_program_fmt(struct drm_encoder *encoder)
9021 {
9022 	struct drm_device *dev = encoder->dev;
9023 	struct radeon_device *rdev = dev->dev_private;
9024 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9025 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9026 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9027 	int bpc = 0;
9028 	u32 tmp = 0;
9029 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9030 
9031 	if (connector) {
9032 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9033 		bpc = radeon_get_monitor_bpc(connector);
9034 		dither = radeon_connector->dither;
9035 	}
9036 
9037 	/* LVDS/eDP FMT is set up by atom */
9038 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9039 		return;
9040 
9041 	/* not needed for analog */
9042 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9043 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9044 		return;
9045 
9046 	if (bpc == 0)
9047 		return;
9048 
9049 	switch (bpc) {
9050 	case 6:
9051 		if (dither == RADEON_FMT_DITHER_ENABLE)
9052 			/* XXX sort out optimal dither settings */
9053 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9054 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9055 		else
9056 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9057 		break;
9058 	case 8:
9059 		if (dither == RADEON_FMT_DITHER_ENABLE)
9060 			/* XXX sort out optimal dither settings */
9061 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9062 				FMT_RGB_RANDOM_ENABLE |
9063 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9064 		else
9065 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9066 		break;
9067 	case 10:
9068 		if (dither == RADEON_FMT_DITHER_ENABLE)
9069 			/* XXX sort out optimal dither settings */
9070 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9071 				FMT_RGB_RANDOM_ENABLE |
9072 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9073 		else
9074 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9075 		break;
9076 	default:
9077 		/* not needed */
9078 		break;
9079 	}
9080 
9081 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9082 }
9083 
9084 /* display watermark setup */
9085 /**
9086  * dce8_line_buffer_adjust - Set up the line buffer
9087  *
9088  * @rdev: radeon_device pointer
9089  * @radeon_crtc: the selected display controller
9090  * @mode: the current display mode on the selected display
9091  * controller
9092  *
9093  * Setup up the line buffer allocation for
9094  * the selected display controller (CIK).
9095  * Returns the line buffer size in pixels.
9096  */
9097 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9098 				   struct radeon_crtc *radeon_crtc,
9099 				   struct drm_display_mode *mode)
9100 {
9101 	u32 tmp, buffer_alloc, i;
9102 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9103 	/*
9104 	 * Line Buffer Setup
9105 	 * There are 6 line buffers, one for each display controllers.
9106 	 * There are 3 partitions per LB. Select the number of partitions
9107 	 * to enable based on the display width.  For display widths larger
9108 	 * than 4096, you need use to use 2 display controllers and combine
9109 	 * them using the stereo blender.
9110 	 */
9111 	if (radeon_crtc->base.enabled && mode) {
9112 		if (mode->crtc_hdisplay < 1920) {
9113 			tmp = 1;
9114 			buffer_alloc = 2;
9115 		} else if (mode->crtc_hdisplay < 2560) {
9116 			tmp = 2;
9117 			buffer_alloc = 2;
9118 		} else if (mode->crtc_hdisplay < 4096) {
9119 			tmp = 0;
9120 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9121 		} else {
9122 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9123 			tmp = 0;
9124 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9125 		}
9126 	} else {
9127 		tmp = 1;
9128 		buffer_alloc = 0;
9129 	}
9130 
9131 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9132 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9133 
9134 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9135 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9136 	for (i = 0; i < rdev->usec_timeout; i++) {
9137 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9138 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9139 			break;
9140 		udelay(1);
9141 	}
9142 
9143 	if (radeon_crtc->base.enabled && mode) {
9144 		switch (tmp) {
9145 		case 0:
9146 		default:
9147 			return 4096 * 2;
9148 		case 1:
9149 			return 1920 * 2;
9150 		case 2:
9151 			return 2560 * 2;
9152 		}
9153 	}
9154 
9155 	/* controller not enabled, so no lb used */
9156 	return 0;
9157 }
9158 
9159 /**
9160  * cik_get_number_of_dram_channels - get the number of dram channels
9161  *
9162  * @rdev: radeon_device pointer
9163  *
9164  * Look up the number of video ram channels (CIK).
9165  * Used for display watermark bandwidth calculations
9166  * Returns the number of dram channels
9167  */
9168 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9169 {
9170 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9171 
9172 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9173 	case 0:
9174 	default:
9175 		return 1;
9176 	case 1:
9177 		return 2;
9178 	case 2:
9179 		return 4;
9180 	case 3:
9181 		return 8;
9182 	case 4:
9183 		return 3;
9184 	case 5:
9185 		return 6;
9186 	case 6:
9187 		return 10;
9188 	case 7:
9189 		return 12;
9190 	case 8:
9191 		return 16;
9192 	}
9193 }
9194 
9195 struct dce8_wm_params {
9196 	u32 dram_channels; /* number of dram channels */
9197 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9198 	u32 sclk;          /* engine clock in kHz */
9199 	u32 disp_clk;      /* display clock in kHz */
9200 	u32 src_width;     /* viewport width */
9201 	u32 active_time;   /* active display time in ns */
9202 	u32 blank_time;    /* blank time in ns */
9203 	bool interlaced;    /* mode is interlaced */
9204 	fixed20_12 vsc;    /* vertical scale ratio */
9205 	u32 num_heads;     /* number of active crtcs */
9206 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9207 	u32 lb_size;       /* line buffer allocated to pipe */
9208 	u32 vtaps;         /* vertical scaler taps */
9209 };
9210 
9211 /**
9212  * dce8_dram_bandwidth - get the dram bandwidth
9213  *
9214  * @wm: watermark calculation data
9215  *
9216  * Calculate the raw dram bandwidth (CIK).
9217  * Used for display watermark bandwidth calculations
9218  * Returns the dram bandwidth in MBytes/s
9219  */
9220 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9221 {
9222 	/* Calculate raw DRAM Bandwidth */
9223 	fixed20_12 dram_efficiency; /* 0.7 */
9224 	fixed20_12 yclk, dram_channels, bandwidth;
9225 	fixed20_12 a;
9226 
9227 	a.full = dfixed_const(1000);
9228 	yclk.full = dfixed_const(wm->yclk);
9229 	yclk.full = dfixed_div(yclk, a);
9230 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9231 	a.full = dfixed_const(10);
9232 	dram_efficiency.full = dfixed_const(7);
9233 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9234 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9235 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9236 
9237 	return dfixed_trunc(bandwidth);
9238 }
9239 
9240 /**
9241  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9242  *
9243  * @wm: watermark calculation data
9244  *
9245  * Calculate the dram bandwidth used for display (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns the dram bandwidth for display in MBytes/s
9248  */
9249 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9250 {
9251 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9252 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9253 	fixed20_12 yclk, dram_channels, bandwidth;
9254 	fixed20_12 a;
9255 
9256 	a.full = dfixed_const(1000);
9257 	yclk.full = dfixed_const(wm->yclk);
9258 	yclk.full = dfixed_div(yclk, a);
9259 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9260 	a.full = dfixed_const(10);
9261 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9262 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9263 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9264 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9265 
9266 	return dfixed_trunc(bandwidth);
9267 }
9268 
9269 /**
9270  * dce8_data_return_bandwidth - get the data return bandwidth
9271  *
9272  * @wm: watermark calculation data
9273  *
9274  * Calculate the data return bandwidth used for display (CIK).
9275  * Used for display watermark bandwidth calculations
9276  * Returns the data return bandwidth in MBytes/s
9277  */
9278 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9279 {
9280 	/* Calculate the display Data return Bandwidth */
9281 	fixed20_12 return_efficiency; /* 0.8 */
9282 	fixed20_12 sclk, bandwidth;
9283 	fixed20_12 a;
9284 
9285 	a.full = dfixed_const(1000);
9286 	sclk.full = dfixed_const(wm->sclk);
9287 	sclk.full = dfixed_div(sclk, a);
9288 	a.full = dfixed_const(10);
9289 	return_efficiency.full = dfixed_const(8);
9290 	return_efficiency.full = dfixed_div(return_efficiency, a);
9291 	a.full = dfixed_const(32);
9292 	bandwidth.full = dfixed_mul(a, sclk);
9293 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9294 
9295 	return dfixed_trunc(bandwidth);
9296 }
9297 
9298 /**
9299  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9300  *
9301  * @wm: watermark calculation data
9302  *
9303  * Calculate the dmif bandwidth used for display (CIK).
9304  * Used for display watermark bandwidth calculations
9305  * Returns the dmif bandwidth in MBytes/s
9306  */
9307 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9308 {
9309 	/* Calculate the DMIF Request Bandwidth */
9310 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9311 	fixed20_12 disp_clk, bandwidth;
9312 	fixed20_12 a, b;
9313 
9314 	a.full = dfixed_const(1000);
9315 	disp_clk.full = dfixed_const(wm->disp_clk);
9316 	disp_clk.full = dfixed_div(disp_clk, a);
9317 	a.full = dfixed_const(32);
9318 	b.full = dfixed_mul(a, disp_clk);
9319 
9320 	a.full = dfixed_const(10);
9321 	disp_clk_request_efficiency.full = dfixed_const(8);
9322 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9323 
9324 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9325 
9326 	return dfixed_trunc(bandwidth);
9327 }
9328 
9329 /**
9330  * dce8_available_bandwidth - get the min available bandwidth
9331  *
9332  * @wm: watermark calculation data
9333  *
9334  * Calculate the min available bandwidth used for display (CIK).
9335  * Used for display watermark bandwidth calculations
9336  * Returns the min available bandwidth in MBytes/s
9337  */
9338 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9339 {
9340 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9341 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9342 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9343 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9344 
9345 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9346 }
9347 
9348 /**
9349  * dce8_average_bandwidth - get the average available bandwidth
9350  *
9351  * @wm: watermark calculation data
9352  *
9353  * Calculate the average available bandwidth used for display (CIK).
9354  * Used for display watermark bandwidth calculations
9355  * Returns the average available bandwidth in MBytes/s
9356  */
9357 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9358 {
9359 	/* Calculate the display mode Average Bandwidth
9360 	 * DisplayMode should contain the source and destination dimensions,
9361 	 * timing, etc.
9362 	 */
9363 	fixed20_12 bpp;
9364 	fixed20_12 line_time;
9365 	fixed20_12 src_width;
9366 	fixed20_12 bandwidth;
9367 	fixed20_12 a;
9368 
9369 	a.full = dfixed_const(1000);
9370 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9371 	line_time.full = dfixed_div(line_time, a);
9372 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9373 	src_width.full = dfixed_const(wm->src_width);
9374 	bandwidth.full = dfixed_mul(src_width, bpp);
9375 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9376 	bandwidth.full = dfixed_div(bandwidth, line_time);
9377 
9378 	return dfixed_trunc(bandwidth);
9379 }
9380 
9381 /**
9382  * dce8_latency_watermark - get the latency watermark
9383  *
9384  * @wm: watermark calculation data
9385  *
9386  * Calculate the latency watermark (CIK).
9387  * Used for display watermark bandwidth calculations
9388  * Returns the latency watermark in ns
9389  */
9390 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9391 {
9392 	/* First calculate the latency in ns */
9393 	u32 mc_latency = 2000; /* 2000 ns. */
9394 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9395 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9396 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9397 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9398 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9399 		(wm->num_heads * cursor_line_pair_return_time);
9400 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9401 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9402 	u32 tmp, dmif_size = 12288;
9403 	fixed20_12 a, b, c;
9404 
9405 	if (wm->num_heads == 0)
9406 		return 0;
9407 
9408 	a.full = dfixed_const(2);
9409 	b.full = dfixed_const(1);
9410 	if ((wm->vsc.full > a.full) ||
9411 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9412 	    (wm->vtaps >= 5) ||
9413 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9414 		max_src_lines_per_dst_line = 4;
9415 	else
9416 		max_src_lines_per_dst_line = 2;
9417 
9418 	a.full = dfixed_const(available_bandwidth);
9419 	b.full = dfixed_const(wm->num_heads);
9420 	a.full = dfixed_div(a, b);
9421 
9422 	b.full = dfixed_const(mc_latency + 512);
9423 	c.full = dfixed_const(wm->disp_clk);
9424 	b.full = dfixed_div(b, c);
9425 
9426 	c.full = dfixed_const(dmif_size);
9427 	b.full = dfixed_div(c, b);
9428 
9429 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9430 
9431 	b.full = dfixed_const(1000);
9432 	c.full = dfixed_const(wm->disp_clk);
9433 	b.full = dfixed_div(c, b);
9434 	c.full = dfixed_const(wm->bytes_per_pixel);
9435 	b.full = dfixed_mul(b, c);
9436 
9437 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9438 
9439 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9440 	b.full = dfixed_const(1000);
9441 	c.full = dfixed_const(lb_fill_bw);
9442 	b.full = dfixed_div(c, b);
9443 	a.full = dfixed_div(a, b);
9444 	line_fill_time = dfixed_trunc(a);
9445 
9446 	if (line_fill_time < wm->active_time)
9447 		return latency;
9448 	else
9449 		return latency + (line_fill_time - wm->active_time);
9450 
9451 }
9452 
9453 /**
9454  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9455  * average and available dram bandwidth
9456  *
9457  * @wm: watermark calculation data
9458  *
9459  * Check if the display average bandwidth fits in the display
9460  * dram bandwidth (CIK).
9461  * Used for display watermark bandwidth calculations
9462  * Returns true if the display fits, false if not.
9463  */
9464 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9465 {
9466 	if (dce8_average_bandwidth(wm) <=
9467 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9468 		return true;
9469 	else
9470 		return false;
9471 }
9472 
9473 /**
9474  * dce8_average_bandwidth_vs_available_bandwidth - check
9475  * average and available bandwidth
9476  *
9477  * @wm: watermark calculation data
9478  *
9479  * Check if the display average bandwidth fits in the display
9480  * available bandwidth (CIK).
9481  * Used for display watermark bandwidth calculations
9482  * Returns true if the display fits, false if not.
9483  */
9484 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9485 {
9486 	if (dce8_average_bandwidth(wm) <=
9487 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9488 		return true;
9489 	else
9490 		return false;
9491 }
9492 
9493 /**
9494  * dce8_check_latency_hiding - check latency hiding
9495  *
9496  * @wm: watermark calculation data
9497  *
9498  * Check latency hiding (CIK).
9499  * Used for display watermark bandwidth calculations
9500  * Returns true if the display fits, false if not.
9501  */
9502 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9503 {
9504 	u32 lb_partitions = wm->lb_size / wm->src_width;
9505 	u32 line_time = wm->active_time + wm->blank_time;
9506 	u32 latency_tolerant_lines;
9507 	u32 latency_hiding;
9508 	fixed20_12 a;
9509 
9510 	a.full = dfixed_const(1);
9511 	if (wm->vsc.full > a.full)
9512 		latency_tolerant_lines = 1;
9513 	else {
9514 		if (lb_partitions <= (wm->vtaps + 1))
9515 			latency_tolerant_lines = 1;
9516 		else
9517 			latency_tolerant_lines = 2;
9518 	}
9519 
9520 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9521 
9522 	if (dce8_latency_watermark(wm) <= latency_hiding)
9523 		return true;
9524 	else
9525 		return false;
9526 }
9527 
9528 /**
9529  * dce8_program_watermarks - program display watermarks
9530  *
9531  * @rdev: radeon_device pointer
9532  * @radeon_crtc: the selected display controller
9533  * @lb_size: line buffer size
9534  * @num_heads: number of display controllers in use
9535  *
9536  * Calculate and program the display watermarks for the
9537  * selected display controller (CIK).
9538  */
9539 static void dce8_program_watermarks(struct radeon_device *rdev,
9540 				    struct radeon_crtc *radeon_crtc,
9541 				    u32 lb_size, u32 num_heads)
9542 {
9543 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9544 	struct dce8_wm_params wm_low, wm_high;
9545 	u32 pixel_period;
9546 	u32 line_time = 0;
9547 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9548 	u32 tmp, wm_mask;
9549 
9550 	if (radeon_crtc->base.enabled && num_heads && mode) {
9551 		pixel_period = 1000000 / (u32)mode->clock;
9552 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9553 
9554 		/* watermark for high clocks */
9555 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9556 		    rdev->pm.dpm_enabled) {
9557 			wm_high.yclk =
9558 				radeon_dpm_get_mclk(rdev, false) * 10;
9559 			wm_high.sclk =
9560 				radeon_dpm_get_sclk(rdev, false) * 10;
9561 		} else {
9562 			wm_high.yclk = rdev->pm.current_mclk * 10;
9563 			wm_high.sclk = rdev->pm.current_sclk * 10;
9564 		}
9565 
9566 		wm_high.disp_clk = mode->clock;
9567 		wm_high.src_width = mode->crtc_hdisplay;
9568 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9569 		wm_high.blank_time = line_time - wm_high.active_time;
9570 		wm_high.interlaced = false;
9571 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9572 			wm_high.interlaced = true;
9573 		wm_high.vsc = radeon_crtc->vsc;
9574 		wm_high.vtaps = 1;
9575 		if (radeon_crtc->rmx_type != RMX_OFF)
9576 			wm_high.vtaps = 2;
9577 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9578 		wm_high.lb_size = lb_size;
9579 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9580 		wm_high.num_heads = num_heads;
9581 
9582 		/* set for high clocks */
9583 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9584 
9585 		/* possibly force display priority to high */
9586 		/* should really do this at mode validation time... */
9587 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9588 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9589 		    !dce8_check_latency_hiding(&wm_high) ||
9590 		    (rdev->disp_priority == 2)) {
9591 			DRM_DEBUG_KMS("force priority to high\n");
9592 		}
9593 
9594 		/* watermark for low clocks */
9595 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9596 		    rdev->pm.dpm_enabled) {
9597 			wm_low.yclk =
9598 				radeon_dpm_get_mclk(rdev, true) * 10;
9599 			wm_low.sclk =
9600 				radeon_dpm_get_sclk(rdev, true) * 10;
9601 		} else {
9602 			wm_low.yclk = rdev->pm.current_mclk * 10;
9603 			wm_low.sclk = rdev->pm.current_sclk * 10;
9604 		}
9605 
9606 		wm_low.disp_clk = mode->clock;
9607 		wm_low.src_width = mode->crtc_hdisplay;
9608 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9609 		wm_low.blank_time = line_time - wm_low.active_time;
9610 		wm_low.interlaced = false;
9611 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9612 			wm_low.interlaced = true;
9613 		wm_low.vsc = radeon_crtc->vsc;
9614 		wm_low.vtaps = 1;
9615 		if (radeon_crtc->rmx_type != RMX_OFF)
9616 			wm_low.vtaps = 2;
9617 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9618 		wm_low.lb_size = lb_size;
9619 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9620 		wm_low.num_heads = num_heads;
9621 
9622 		/* set for low clocks */
9623 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9624 
9625 		/* possibly force display priority to high */
9626 		/* should really do this at mode validation time... */
9627 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9628 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9629 		    !dce8_check_latency_hiding(&wm_low) ||
9630 		    (rdev->disp_priority == 2)) {
9631 			DRM_DEBUG_KMS("force priority to high\n");
9632 		}
9633 	}
9634 
9635 	/* select wm A */
9636 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9637 	tmp = wm_mask;
9638 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9639 	tmp |= LATENCY_WATERMARK_MASK(1);
9640 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9641 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9642 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9643 		LATENCY_HIGH_WATERMARK(line_time)));
9644 	/* select wm B */
9645 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9646 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9647 	tmp |= LATENCY_WATERMARK_MASK(2);
9648 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9649 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9650 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9651 		LATENCY_HIGH_WATERMARK(line_time)));
9652 	/* restore original selection */
9653 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9654 
9655 	/* save values for DPM */
9656 	radeon_crtc->line_time = line_time;
9657 	radeon_crtc->wm_high = latency_watermark_a;
9658 	radeon_crtc->wm_low = latency_watermark_b;
9659 }
9660 
9661 /**
9662  * dce8_bandwidth_update - program display watermarks
9663  *
9664  * @rdev: radeon_device pointer
9665  *
9666  * Calculate and program the display watermarks and line
9667  * buffer allocation (CIK).
9668  */
9669 void dce8_bandwidth_update(struct radeon_device *rdev)
9670 {
9671 	struct drm_display_mode *mode = NULL;
9672 	u32 num_heads = 0, lb_size;
9673 	int i;
9674 
9675 	if (!rdev->mode_info.mode_config_initialized)
9676 		return;
9677 
9678 	radeon_update_display_priority(rdev);
9679 
9680 	for (i = 0; i < rdev->num_crtc; i++) {
9681 		if (rdev->mode_info.crtcs[i]->base.enabled)
9682 			num_heads++;
9683 	}
9684 	for (i = 0; i < rdev->num_crtc; i++) {
9685 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9686 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9687 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9688 	}
9689 }
9690 
9691 /**
9692  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9693  *
9694  * @rdev: radeon_device pointer
9695  *
9696  * Fetches a GPU clock counter snapshot (SI).
9697  * Returns the 64 bit clock counter snapshot.
9698  */
9699 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9700 {
9701 	uint64_t clock;
9702 
9703 	mutex_lock(&rdev->gpu_clock_mutex);
9704 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9705 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9706 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9707 	mutex_unlock(&rdev->gpu_clock_mutex);
9708 	return clock;
9709 }
9710 
9711 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9712                               u32 cntl_reg, u32 status_reg)
9713 {
9714 	int r, i;
9715 	struct atom_clock_dividers dividers;
9716 	uint32_t tmp;
9717 
9718 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9719 					   clock, false, &dividers);
9720 	if (r)
9721 		return r;
9722 
9723 	tmp = RREG32_SMC(cntl_reg);
9724 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9725 	tmp |= dividers.post_divider;
9726 	WREG32_SMC(cntl_reg, tmp);
9727 
9728 	for (i = 0; i < 100; i++) {
9729 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9730 			break;
9731 		mdelay(10);
9732 	}
9733 	if (i == 100)
9734 		return -ETIMEDOUT;
9735 
9736 	return 0;
9737 }
9738 
9739 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9740 {
9741 	int r = 0;
9742 
9743 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9744 	if (r)
9745 		return r;
9746 
9747 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9748 	return r;
9749 }
9750 
9751 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9752 {
9753 	int r, i;
9754 	struct atom_clock_dividers dividers;
9755 	u32 tmp;
9756 
9757 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9758 					   ecclk, false, &dividers);
9759 	if (r)
9760 		return r;
9761 
9762 	for (i = 0; i < 100; i++) {
9763 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9764 			break;
9765 		mdelay(10);
9766 	}
9767 	if (i == 100)
9768 		return -ETIMEDOUT;
9769 
9770 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9771 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9772 	tmp |= dividers.post_divider;
9773 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9774 
9775 	for (i = 0; i < 100; i++) {
9776 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9777 			break;
9778 		mdelay(10);
9779 	}
9780 	if (i == 100)
9781 		return -ETIMEDOUT;
9782 
9783 	return 0;
9784 }
9785 
9786 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9787 {
9788 	struct pci_dev *root = rdev->pdev->bus->self;
9789 	int bridge_pos, gpu_pos;
9790 	u32 speed_cntl, mask, current_data_rate;
9791 	int ret, i;
9792 	u16 tmp16;
9793 
9794 	if (pci_is_root_bus(rdev->pdev->bus))
9795 		return;
9796 
9797 	if (radeon_pcie_gen2 == 0)
9798 		return;
9799 
9800 	if (rdev->flags & RADEON_IS_IGP)
9801 		return;
9802 
9803 	if (!(rdev->flags & RADEON_IS_PCIE))
9804 		return;
9805 
9806 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9807 	if (ret != 0)
9808 		return;
9809 
9810 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9811 		return;
9812 
9813 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9814 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9815 		LC_CURRENT_DATA_RATE_SHIFT;
9816 	if (mask & DRM_PCIE_SPEED_80) {
9817 		if (current_data_rate == 2) {
9818 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9819 			return;
9820 		}
9821 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9822 	} else if (mask & DRM_PCIE_SPEED_50) {
9823 		if (current_data_rate == 1) {
9824 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9825 			return;
9826 		}
9827 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9828 	}
9829 
9830 	bridge_pos = pci_pcie_cap(root);
9831 	if (!bridge_pos)
9832 		return;
9833 
9834 	gpu_pos = pci_pcie_cap(rdev->pdev);
9835 	if (!gpu_pos)
9836 		return;
9837 
9838 	if (mask & DRM_PCIE_SPEED_80) {
9839 		/* re-try equalization if gen3 is not already enabled */
9840 		if (current_data_rate != 2) {
9841 			u16 bridge_cfg, gpu_cfg;
9842 			u16 bridge_cfg2, gpu_cfg2;
9843 			u32 max_lw, current_lw, tmp;
9844 
9845 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9846 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9847 
9848 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9849 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9850 
9851 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9852 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9853 
9854 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9855 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9856 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9857 
9858 			if (current_lw < max_lw) {
9859 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9860 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9861 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9862 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9863 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9864 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9865 				}
9866 			}
9867 
9868 			for (i = 0; i < 10; i++) {
9869 				/* check status */
9870 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9871 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9872 					break;
9873 
9874 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9875 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9876 
9877 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9878 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9879 
9880 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9881 				tmp |= LC_SET_QUIESCE;
9882 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9883 
9884 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9885 				tmp |= LC_REDO_EQ;
9886 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9887 
9888 				mdelay(100);
9889 
9890 				/* linkctl */
9891 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9892 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9893 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9894 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9895 
9896 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9897 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9898 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9899 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9900 
9901 				/* linkctl2 */
9902 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9903 				tmp16 &= ~((1 << 4) | (7 << 9));
9904 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9905 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9906 
9907 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9908 				tmp16 &= ~((1 << 4) | (7 << 9));
9909 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9910 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9911 
9912 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9913 				tmp &= ~LC_SET_QUIESCE;
9914 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9915 			}
9916 		}
9917 	}
9918 
9919 	/* set the link speed */
9920 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9921 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9922 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9923 
9924 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9925 	tmp16 &= ~0xf;
9926 	if (mask & DRM_PCIE_SPEED_80)
9927 		tmp16 |= 3; /* gen3 */
9928 	else if (mask & DRM_PCIE_SPEED_50)
9929 		tmp16 |= 2; /* gen2 */
9930 	else
9931 		tmp16 |= 1; /* gen1 */
9932 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9933 
9934 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9935 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9936 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9937 
9938 	for (i = 0; i < rdev->usec_timeout; i++) {
9939 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9940 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9941 			break;
9942 		udelay(1);
9943 	}
9944 }
9945 
9946 static void cik_program_aspm(struct radeon_device *rdev)
9947 {
9948 	u32 data, orig;
9949 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9950 	bool disable_clkreq = false;
9951 
9952 	if (radeon_aspm == 0)
9953 		return;
9954 
9955 	/* XXX double check IGPs */
9956 	if (rdev->flags & RADEON_IS_IGP)
9957 		return;
9958 
9959 	if (!(rdev->flags & RADEON_IS_PCIE))
9960 		return;
9961 
9962 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9963 	data &= ~LC_XMIT_N_FTS_MASK;
9964 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9965 	if (orig != data)
9966 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9967 
9968 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9969 	data |= LC_GO_TO_RECOVERY;
9970 	if (orig != data)
9971 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9972 
9973 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9974 	data |= P_IGNORE_EDB_ERR;
9975 	if (orig != data)
9976 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9977 
9978 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9979 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9980 	data |= LC_PMI_TO_L1_DIS;
9981 	if (!disable_l0s)
9982 		data |= LC_L0S_INACTIVITY(7);
9983 
9984 	if (!disable_l1) {
9985 		data |= LC_L1_INACTIVITY(7);
9986 		data &= ~LC_PMI_TO_L1_DIS;
9987 		if (orig != data)
9988 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9989 
9990 		if (!disable_plloff_in_l1) {
9991 			bool clk_req_support;
9992 
9993 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9994 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9995 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9996 			if (orig != data)
9997 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9998 
9999 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
10000 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10001 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10002 			if (orig != data)
10003 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10004 
10005 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10006 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10007 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10008 			if (orig != data)
10009 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10010 
10011 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10012 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10013 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10014 			if (orig != data)
10015 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10016 
10017 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10018 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10019 			data |= LC_DYN_LANES_PWR_STATE(3);
10020 			if (orig != data)
10021 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10022 
10023 			if (!disable_clkreq &&
10024 			    !pci_is_root_bus(rdev->pdev->bus)) {
10025 				struct pci_dev *root = rdev->pdev->bus->self;
10026 				u32 lnkcap;
10027 
10028 				clk_req_support = false;
10029 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10030 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10031 					clk_req_support = true;
10032 			} else {
10033 				clk_req_support = false;
10034 			}
10035 
10036 			if (clk_req_support) {
10037 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10038 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10039 				if (orig != data)
10040 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10041 
10042 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10043 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10044 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10045 				if (orig != data)
10046 					WREG32_SMC(THM_CLK_CNTL, data);
10047 
10048 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10049 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10050 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10051 				if (orig != data)
10052 					WREG32_SMC(MISC_CLK_CTRL, data);
10053 
10054 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10055 				data &= ~BCLK_AS_XCLK;
10056 				if (orig != data)
10057 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10058 
10059 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10060 				data &= ~FORCE_BIF_REFCLK_EN;
10061 				if (orig != data)
10062 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10063 
10064 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10065 				data &= ~MPLL_CLKOUT_SEL_MASK;
10066 				data |= MPLL_CLKOUT_SEL(4);
10067 				if (orig != data)
10068 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10069 			}
10070 		}
10071 	} else {
10072 		if (orig != data)
10073 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10074 	}
10075 
10076 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10077 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10078 	if (orig != data)
10079 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10080 
10081 	if (!disable_l0s) {
10082 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10083 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10084 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10085 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10086 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10087 				data &= ~LC_L0S_INACTIVITY_MASK;
10088 				if (orig != data)
10089 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10090 			}
10091 		}
10092 	}
10093 }
10094