xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision bc5aa3a0)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
57 
58 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
69 MODULE_FIRMWARE("radeon/hawaii_me.bin");
70 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
72 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
74 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
75 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
84 
85 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
86 MODULE_FIRMWARE("radeon/kaveri_me.bin");
87 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
89 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
90 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
91 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
92 
93 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
94 MODULE_FIRMWARE("radeon/KABINI_me.bin");
95 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
96 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
97 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
98 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
99 
100 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
101 MODULE_FIRMWARE("radeon/kabini_me.bin");
102 MODULE_FIRMWARE("radeon/kabini_ce.bin");
103 MODULE_FIRMWARE("radeon/kabini_mec.bin");
104 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
105 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
106 
107 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
113 
114 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
115 MODULE_FIRMWARE("radeon/mullins_me.bin");
116 MODULE_FIRMWARE("radeon/mullins_ce.bin");
117 MODULE_FIRMWARE("radeon/mullins_mec.bin");
118 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
119 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
120 
121 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
122 extern void r600_ih_ring_fini(struct radeon_device *rdev);
123 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 extern void sumo_rlc_fini(struct radeon_device *rdev);
127 extern int sumo_rlc_init(struct radeon_device *rdev);
128 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
129 extern void si_rlc_reset(struct radeon_device *rdev);
130 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
131 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
132 extern int cik_sdma_resume(struct radeon_device *rdev);
133 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
134 extern void cik_sdma_fini(struct radeon_device *rdev);
135 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
136 static void cik_rlc_stop(struct radeon_device *rdev);
137 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
138 static void cik_program_aspm(struct radeon_device *rdev);
139 static void cik_init_pg(struct radeon_device *rdev);
140 static void cik_init_cg(struct radeon_device *rdev);
141 static void cik_fini_pg(struct radeon_device *rdev);
142 static void cik_fini_cg(struct radeon_device *rdev);
143 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
144 					  bool enable);
145 
146 /**
147  * cik_get_allowed_info_register - fetch the register for the info ioctl
148  *
149  * @rdev: radeon_device pointer
150  * @reg: register offset in bytes
151  * @val: register value
152  *
153  * Returns 0 for success or -EINVAL for an invalid register
154  *
155  */
156 int cik_get_allowed_info_register(struct radeon_device *rdev,
157 				  u32 reg, u32 *val)
158 {
159 	switch (reg) {
160 	case GRBM_STATUS:
161 	case GRBM_STATUS2:
162 	case GRBM_STATUS_SE0:
163 	case GRBM_STATUS_SE1:
164 	case GRBM_STATUS_SE2:
165 	case GRBM_STATUS_SE3:
166 	case SRBM_STATUS:
167 	case SRBM_STATUS2:
168 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
169 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
170 	case UVD_STATUS:
171 	/* TODO VCE */
172 		*val = RREG32(reg);
173 		return 0;
174 	default:
175 		return -EINVAL;
176 	}
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
188 	WREG32(CIK_DIDT_IND_INDEX, (reg));
189 	r = RREG32(CIK_DIDT_IND_DATA);
190 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
191 	return r;
192 }
193 
194 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
195 {
196 	unsigned long flags;
197 
198 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
199 	WREG32(CIK_DIDT_IND_INDEX, (reg));
200 	WREG32(CIK_DIDT_IND_DATA, (v));
201 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
202 }
203 
204 /* get temperature in millidegrees */
205 int ci_get_temp(struct radeon_device *rdev)
206 {
207 	u32 temp;
208 	int actual_temp = 0;
209 
210 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
211 		CTF_TEMP_SHIFT;
212 
213 	if (temp & 0x200)
214 		actual_temp = 255;
215 	else
216 		actual_temp = temp & 0x1ff;
217 
218 	actual_temp = actual_temp * 1000;
219 
220 	return actual_temp;
221 }
222 
223 /* get temperature in millidegrees */
224 int kv_get_temp(struct radeon_device *rdev)
225 {
226 	u32 temp;
227 	int actual_temp = 0;
228 
229 	temp = RREG32_SMC(0xC0300E0C);
230 
231 	if (temp)
232 		actual_temp = (temp / 8) - 49;
233 	else
234 		actual_temp = 0;
235 
236 	actual_temp = actual_temp * 1000;
237 
238 	return actual_temp;
239 }
240 
241 /*
242  * Indirect registers accessor
243  */
244 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
245 {
246 	unsigned long flags;
247 	u32 r;
248 
249 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250 	WREG32(PCIE_INDEX, reg);
251 	(void)RREG32(PCIE_INDEX);
252 	r = RREG32(PCIE_DATA);
253 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
254 	return r;
255 }
256 
257 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
258 {
259 	unsigned long flags;
260 
261 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
262 	WREG32(PCIE_INDEX, reg);
263 	(void)RREG32(PCIE_INDEX);
264 	WREG32(PCIE_DATA, v);
265 	(void)RREG32(PCIE_DATA);
266 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
267 }
268 
269 static const u32 spectre_rlc_save_restore_register_list[] =
270 {
271 	(0x0e00 << 16) | (0xc12c >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc140 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc150 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc15c >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc168 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc170 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc178 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc204 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b4 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b8 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2bc >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2c0 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x8228 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x829c >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x869c >> 2),
300 	0x00000000,
301 	(0x0600 << 16) | (0x98f4 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x98f8 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x9900 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0xc260 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x90e8 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c000 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c00c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x8c1c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x9700 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x4e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x5e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x6e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x7e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x8e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x9e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xae00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xbe00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x89bc >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x8900 >> 2),
340 	0x00000000,
341 	0x3,
342 	(0x0e00 << 16) | (0xc130 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc134 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc1fc >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc208 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc264 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc268 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc26c >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc270 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc274 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc278 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc27c >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc280 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc284 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc288 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc28c >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc290 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc294 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc298 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc29c >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a0 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a4 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a8 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2ac  >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2b0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x301d0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30238 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30250 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30254 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30258 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x3025c >> 2),
401 	0x00000000,
402 	(0x4e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x5e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x6e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x7e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x8e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x9e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xae00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xbe00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0x4e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x5e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x6e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x7e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x8e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x9e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xae00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xbe00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0x4e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x5e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x6e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x7e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x8e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x9e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xae00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xbe00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0x4e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x5e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x6e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x7e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x8e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x9e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xae00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xbe00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0x4e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x5e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x6e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x7e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x8e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x9e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xae00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xbe00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0xc99c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0x9834 >> 2),
485 	0x00000000,
486 	(0x0000 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0001 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0000 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0001 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0000 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0001 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0000 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0001 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0600 << 16) | (0x9b7c >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a14 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a18 >> 2),
507 	0x00000000,
508 	(0x0600 << 16) | (0x30a00 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bf0 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bcc >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8b24 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x30a04 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a10 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a14 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a18 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a2c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc700 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc704 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc708 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc768 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc770 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc774 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc778 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc77c >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc780 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc784 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc788 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc78c >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc798 >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc79c >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a0 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a4 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a8 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7ac >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b0 >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x9100 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x3c010 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92a8 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92ac >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92bc >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c0 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c8 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92cc >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92d0 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c00 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c04 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c20 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c38 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c3c >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xae00 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x9604 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac08 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac0c >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac10 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac14 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac58 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac68 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac6c >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac70 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac74 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac78 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac7c >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac80 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac84 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac88 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac8c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x970c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9714 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9718 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x971c >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x4e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x5e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x6e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x7e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x8e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x9e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xae00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xbe00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd10 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd14 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b0 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b4 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b8 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88bc >> 2),
671 	0x00000000,
672 	(0x0400 << 16) | (0x89c0 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c4 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c8 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d4 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d8 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x8980 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x30938 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x3093c >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x30940 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x89a0 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30900 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30904 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x89b4 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c210 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c214 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c218 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x8904 >> 2),
707 	0x00000000,
708 	0x5,
709 	(0x0e00 << 16) | (0x8c28 >> 2),
710 	(0x0e00 << 16) | (0x8c2c >> 2),
711 	(0x0e00 << 16) | (0x8c30 >> 2),
712 	(0x0e00 << 16) | (0x8c34 >> 2),
713 	(0x0e00 << 16) | (0x9600 >> 2),
714 };
715 
716 static const u32 kalindi_rlc_save_restore_register_list[] =
717 {
718 	(0x0e00 << 16) | (0xc12c >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc140 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc150 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc15c >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc168 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc170 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc204 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b4 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b8 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2bc >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2c0 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x8228 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x829c >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x869c >> 2),
745 	0x00000000,
746 	(0x0600 << 16) | (0x98f4 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x98f8 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x9900 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0xc260 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x90e8 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c000 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c00c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8c1c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x9700 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x4e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x5e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x6e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x7e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x89bc >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x8900 >> 2),
777 	0x00000000,
778 	0x3,
779 	(0x0e00 << 16) | (0xc130 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc134 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc1fc >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc208 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc264 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc268 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc26c >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc270 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc274 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc28c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc290 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc294 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc298 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a0 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a4 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a8 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2ac >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x301d0 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30238 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30250 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30254 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30258 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x3025c >> 2),
824 	0x00000000,
825 	(0x4e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x5e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x6e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x7e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x4e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x5e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x6e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x7e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x4e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x5e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x6e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x7e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x4e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x5e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x6e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x7e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x4e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x5e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x6e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x7e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0xc99c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x9834 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f00 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f04 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f08 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f0c >> 2),
876 	0x00000000,
877 	(0x0600 << 16) | (0x9b7c >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a18 >> 2),
882 	0x00000000,
883 	(0x0600 << 16) | (0x30a00 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bf0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bcc >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8b24 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x30a04 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a10 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a14 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a18 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a2c >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc700 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc704 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc708 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc768 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc770 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc774 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc798 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc79c >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x9100 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x3c010 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c00 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c04 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c20 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c38 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c3c >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0xae00 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x9604 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac08 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac0c >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac10 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac14 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac58 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac68 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac6c >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac70 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac74 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac78 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac7c >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac80 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac84 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac88 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac8c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x970c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9714 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9718 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x971c >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x4e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x5e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x6e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x7e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd10 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd14 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b0 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b4 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b8 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88bc >> 2),
994 	0x00000000,
995 	(0x0400 << 16) | (0x89c0 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c4 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c8 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d0 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d4 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d8 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x8980 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x30938 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x3093c >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x30940 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x89a0 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30900 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30904 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x89b4 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3e1fc >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c210 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c214 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c218 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x8904 >> 2),
1032 	0x00000000,
1033 	0x5,
1034 	(0x0e00 << 16) | (0x8c28 >> 2),
1035 	(0x0e00 << 16) | (0x8c2c >> 2),
1036 	(0x0e00 << 16) | (0x8c30 >> 2),
1037 	(0x0e00 << 16) | (0x8c34 >> 2),
1038 	(0x0e00 << 16) | (0x9600 >> 2),
1039 };
1040 
1041 static const u32 bonaire_golden_spm_registers[] =
1042 {
1043 	0x30800, 0xe0ffffff, 0xe0000000
1044 };
1045 
1046 static const u32 bonaire_golden_common_registers[] =
1047 {
1048 	0xc770, 0xffffffff, 0x00000800,
1049 	0xc774, 0xffffffff, 0x00000800,
1050 	0xc798, 0xffffffff, 0x00007fbf,
1051 	0xc79c, 0xffffffff, 0x00007faf
1052 };
1053 
1054 static const u32 bonaire_golden_registers[] =
1055 {
1056 	0x3354, 0x00000333, 0x00000333,
1057 	0x3350, 0x000c0fc0, 0x00040200,
1058 	0x9a10, 0x00010000, 0x00058208,
1059 	0x3c000, 0xffff1fff, 0x00140000,
1060 	0x3c200, 0xfdfc0fff, 0x00000100,
1061 	0x3c234, 0x40000000, 0x40000200,
1062 	0x9830, 0xffffffff, 0x00000000,
1063 	0x9834, 0xf00fffff, 0x00000400,
1064 	0x9838, 0x0002021c, 0x00020200,
1065 	0xc78, 0x00000080, 0x00000000,
1066 	0x5bb0, 0x000000f0, 0x00000070,
1067 	0x5bc0, 0xf0311fff, 0x80300000,
1068 	0x98f8, 0x73773777, 0x12010001,
1069 	0x350c, 0x00810000, 0x408af000,
1070 	0x7030, 0x31000111, 0x00000011,
1071 	0x2f48, 0x73773777, 0x12010001,
1072 	0x220c, 0x00007fb6, 0x0021a1b1,
1073 	0x2210, 0x00007fb6, 0x002021b1,
1074 	0x2180, 0x00007fb6, 0x00002191,
1075 	0x2218, 0x00007fb6, 0x002121b1,
1076 	0x221c, 0x00007fb6, 0x002021b1,
1077 	0x21dc, 0x00007fb6, 0x00002191,
1078 	0x21e0, 0x00007fb6, 0x00002191,
1079 	0x3628, 0x0000003f, 0x0000000a,
1080 	0x362c, 0x0000003f, 0x0000000a,
1081 	0x2ae4, 0x00073ffe, 0x000022a2,
1082 	0x240c, 0x000007ff, 0x00000000,
1083 	0x8a14, 0xf000003f, 0x00000007,
1084 	0x8bf0, 0x00002001, 0x00000001,
1085 	0x8b24, 0xffffffff, 0x00ffffff,
1086 	0x30a04, 0x0000ff0f, 0x00000000,
1087 	0x28a4c, 0x07ffffff, 0x06000000,
1088 	0x4d8, 0x00000fff, 0x00000100,
1089 	0x3e78, 0x00000001, 0x00000002,
1090 	0x9100, 0x03000000, 0x0362c688,
1091 	0x8c00, 0x000000ff, 0x00000001,
1092 	0xe40, 0x00001fff, 0x00001fff,
1093 	0x9060, 0x0000007f, 0x00000020,
1094 	0x9508, 0x00010000, 0x00010000,
1095 	0xac14, 0x000003ff, 0x000000f3,
1096 	0xac0c, 0xffffffff, 0x00001032
1097 };
1098 
1099 static const u32 bonaire_mgcg_cgcg_init[] =
1100 {
1101 	0xc420, 0xffffffff, 0xfffffffc,
1102 	0x30800, 0xffffffff, 0xe0000000,
1103 	0x3c2a0, 0xffffffff, 0x00000100,
1104 	0x3c208, 0xffffffff, 0x00000100,
1105 	0x3c2c0, 0xffffffff, 0xc0000100,
1106 	0x3c2c8, 0xffffffff, 0xc0000100,
1107 	0x3c2c4, 0xffffffff, 0xc0000100,
1108 	0x55e4, 0xffffffff, 0x00600100,
1109 	0x3c280, 0xffffffff, 0x00000100,
1110 	0x3c214, 0xffffffff, 0x06000100,
1111 	0x3c220, 0xffffffff, 0x00000100,
1112 	0x3c218, 0xffffffff, 0x06000100,
1113 	0x3c204, 0xffffffff, 0x00000100,
1114 	0x3c2e0, 0xffffffff, 0x00000100,
1115 	0x3c224, 0xffffffff, 0x00000100,
1116 	0x3c200, 0xffffffff, 0x00000100,
1117 	0x3c230, 0xffffffff, 0x00000100,
1118 	0x3c234, 0xffffffff, 0x00000100,
1119 	0x3c250, 0xffffffff, 0x00000100,
1120 	0x3c254, 0xffffffff, 0x00000100,
1121 	0x3c258, 0xffffffff, 0x00000100,
1122 	0x3c25c, 0xffffffff, 0x00000100,
1123 	0x3c260, 0xffffffff, 0x00000100,
1124 	0x3c27c, 0xffffffff, 0x00000100,
1125 	0x3c278, 0xffffffff, 0x00000100,
1126 	0x3c210, 0xffffffff, 0x06000100,
1127 	0x3c290, 0xffffffff, 0x00000100,
1128 	0x3c274, 0xffffffff, 0x00000100,
1129 	0x3c2b4, 0xffffffff, 0x00000100,
1130 	0x3c2b0, 0xffffffff, 0x00000100,
1131 	0x3c270, 0xffffffff, 0x00000100,
1132 	0x30800, 0xffffffff, 0xe0000000,
1133 	0x3c020, 0xffffffff, 0x00010000,
1134 	0x3c024, 0xffffffff, 0x00030002,
1135 	0x3c028, 0xffffffff, 0x00040007,
1136 	0x3c02c, 0xffffffff, 0x00060005,
1137 	0x3c030, 0xffffffff, 0x00090008,
1138 	0x3c034, 0xffffffff, 0x00010000,
1139 	0x3c038, 0xffffffff, 0x00030002,
1140 	0x3c03c, 0xffffffff, 0x00040007,
1141 	0x3c040, 0xffffffff, 0x00060005,
1142 	0x3c044, 0xffffffff, 0x00090008,
1143 	0x3c048, 0xffffffff, 0x00010000,
1144 	0x3c04c, 0xffffffff, 0x00030002,
1145 	0x3c050, 0xffffffff, 0x00040007,
1146 	0x3c054, 0xffffffff, 0x00060005,
1147 	0x3c058, 0xffffffff, 0x00090008,
1148 	0x3c05c, 0xffffffff, 0x00010000,
1149 	0x3c060, 0xffffffff, 0x00030002,
1150 	0x3c064, 0xffffffff, 0x00040007,
1151 	0x3c068, 0xffffffff, 0x00060005,
1152 	0x3c06c, 0xffffffff, 0x00090008,
1153 	0x3c070, 0xffffffff, 0x00010000,
1154 	0x3c074, 0xffffffff, 0x00030002,
1155 	0x3c078, 0xffffffff, 0x00040007,
1156 	0x3c07c, 0xffffffff, 0x00060005,
1157 	0x3c080, 0xffffffff, 0x00090008,
1158 	0x3c084, 0xffffffff, 0x00010000,
1159 	0x3c088, 0xffffffff, 0x00030002,
1160 	0x3c08c, 0xffffffff, 0x00040007,
1161 	0x3c090, 0xffffffff, 0x00060005,
1162 	0x3c094, 0xffffffff, 0x00090008,
1163 	0x3c098, 0xffffffff, 0x00010000,
1164 	0x3c09c, 0xffffffff, 0x00030002,
1165 	0x3c0a0, 0xffffffff, 0x00040007,
1166 	0x3c0a4, 0xffffffff, 0x00060005,
1167 	0x3c0a8, 0xffffffff, 0x00090008,
1168 	0x3c000, 0xffffffff, 0x96e00200,
1169 	0x8708, 0xffffffff, 0x00900100,
1170 	0xc424, 0xffffffff, 0x0020003f,
1171 	0x38, 0xffffffff, 0x0140001c,
1172 	0x3c, 0x000f0000, 0x000f0000,
1173 	0x220, 0xffffffff, 0xC060000C,
1174 	0x224, 0xc0000fff, 0x00000100,
1175 	0xf90, 0xffffffff, 0x00000100,
1176 	0xf98, 0x00000101, 0x00000000,
1177 	0x20a8, 0xffffffff, 0x00000104,
1178 	0x55e4, 0xff000fff, 0x00000100,
1179 	0x30cc, 0xc0000fff, 0x00000104,
1180 	0xc1e4, 0x00000001, 0x00000001,
1181 	0xd00c, 0xff000ff0, 0x00000100,
1182 	0xd80c, 0xff000ff0, 0x00000100
1183 };
1184 
1185 static const u32 spectre_golden_spm_registers[] =
1186 {
1187 	0x30800, 0xe0ffffff, 0xe0000000
1188 };
1189 
1190 static const u32 spectre_golden_common_registers[] =
1191 {
1192 	0xc770, 0xffffffff, 0x00000800,
1193 	0xc774, 0xffffffff, 0x00000800,
1194 	0xc798, 0xffffffff, 0x00007fbf,
1195 	0xc79c, 0xffffffff, 0x00007faf
1196 };
1197 
1198 static const u32 spectre_golden_registers[] =
1199 {
1200 	0x3c000, 0xffff1fff, 0x96940200,
1201 	0x3c00c, 0xffff0001, 0xff000000,
1202 	0x3c200, 0xfffc0fff, 0x00000100,
1203 	0x6ed8, 0x00010101, 0x00010000,
1204 	0x9834, 0xf00fffff, 0x00000400,
1205 	0x9838, 0xfffffffc, 0x00020200,
1206 	0x5bb0, 0x000000f0, 0x00000070,
1207 	0x5bc0, 0xf0311fff, 0x80300000,
1208 	0x98f8, 0x73773777, 0x12010001,
1209 	0x9b7c, 0x00ff0000, 0x00fc0000,
1210 	0x2f48, 0x73773777, 0x12010001,
1211 	0x8a14, 0xf000003f, 0x00000007,
1212 	0x8b24, 0xffffffff, 0x00ffffff,
1213 	0x28350, 0x3f3f3fff, 0x00000082,
1214 	0x28354, 0x0000003f, 0x00000000,
1215 	0x3e78, 0x00000001, 0x00000002,
1216 	0x913c, 0xffff03df, 0x00000004,
1217 	0xc768, 0x00000008, 0x00000008,
1218 	0x8c00, 0x000008ff, 0x00000800,
1219 	0x9508, 0x00010000, 0x00010000,
1220 	0xac0c, 0xffffffff, 0x54763210,
1221 	0x214f8, 0x01ff01ff, 0x00000002,
1222 	0x21498, 0x007ff800, 0x00200000,
1223 	0x2015c, 0xffffffff, 0x00000f40,
1224 	0x30934, 0xffffffff, 0x00000001
1225 };
1226 
1227 static const u32 spectre_mgcg_cgcg_init[] =
1228 {
1229 	0xc420, 0xffffffff, 0xfffffffc,
1230 	0x30800, 0xffffffff, 0xe0000000,
1231 	0x3c2a0, 0xffffffff, 0x00000100,
1232 	0x3c208, 0xffffffff, 0x00000100,
1233 	0x3c2c0, 0xffffffff, 0x00000100,
1234 	0x3c2c8, 0xffffffff, 0x00000100,
1235 	0x3c2c4, 0xffffffff, 0x00000100,
1236 	0x55e4, 0xffffffff, 0x00600100,
1237 	0x3c280, 0xffffffff, 0x00000100,
1238 	0x3c214, 0xffffffff, 0x06000100,
1239 	0x3c220, 0xffffffff, 0x00000100,
1240 	0x3c218, 0xffffffff, 0x06000100,
1241 	0x3c204, 0xffffffff, 0x00000100,
1242 	0x3c2e0, 0xffffffff, 0x00000100,
1243 	0x3c224, 0xffffffff, 0x00000100,
1244 	0x3c200, 0xffffffff, 0x00000100,
1245 	0x3c230, 0xffffffff, 0x00000100,
1246 	0x3c234, 0xffffffff, 0x00000100,
1247 	0x3c250, 0xffffffff, 0x00000100,
1248 	0x3c254, 0xffffffff, 0x00000100,
1249 	0x3c258, 0xffffffff, 0x00000100,
1250 	0x3c25c, 0xffffffff, 0x00000100,
1251 	0x3c260, 0xffffffff, 0x00000100,
1252 	0x3c27c, 0xffffffff, 0x00000100,
1253 	0x3c278, 0xffffffff, 0x00000100,
1254 	0x3c210, 0xffffffff, 0x06000100,
1255 	0x3c290, 0xffffffff, 0x00000100,
1256 	0x3c274, 0xffffffff, 0x00000100,
1257 	0x3c2b4, 0xffffffff, 0x00000100,
1258 	0x3c2b0, 0xffffffff, 0x00000100,
1259 	0x3c270, 0xffffffff, 0x00000100,
1260 	0x30800, 0xffffffff, 0xe0000000,
1261 	0x3c020, 0xffffffff, 0x00010000,
1262 	0x3c024, 0xffffffff, 0x00030002,
1263 	0x3c028, 0xffffffff, 0x00040007,
1264 	0x3c02c, 0xffffffff, 0x00060005,
1265 	0x3c030, 0xffffffff, 0x00090008,
1266 	0x3c034, 0xffffffff, 0x00010000,
1267 	0x3c038, 0xffffffff, 0x00030002,
1268 	0x3c03c, 0xffffffff, 0x00040007,
1269 	0x3c040, 0xffffffff, 0x00060005,
1270 	0x3c044, 0xffffffff, 0x00090008,
1271 	0x3c048, 0xffffffff, 0x00010000,
1272 	0x3c04c, 0xffffffff, 0x00030002,
1273 	0x3c050, 0xffffffff, 0x00040007,
1274 	0x3c054, 0xffffffff, 0x00060005,
1275 	0x3c058, 0xffffffff, 0x00090008,
1276 	0x3c05c, 0xffffffff, 0x00010000,
1277 	0x3c060, 0xffffffff, 0x00030002,
1278 	0x3c064, 0xffffffff, 0x00040007,
1279 	0x3c068, 0xffffffff, 0x00060005,
1280 	0x3c06c, 0xffffffff, 0x00090008,
1281 	0x3c070, 0xffffffff, 0x00010000,
1282 	0x3c074, 0xffffffff, 0x00030002,
1283 	0x3c078, 0xffffffff, 0x00040007,
1284 	0x3c07c, 0xffffffff, 0x00060005,
1285 	0x3c080, 0xffffffff, 0x00090008,
1286 	0x3c084, 0xffffffff, 0x00010000,
1287 	0x3c088, 0xffffffff, 0x00030002,
1288 	0x3c08c, 0xffffffff, 0x00040007,
1289 	0x3c090, 0xffffffff, 0x00060005,
1290 	0x3c094, 0xffffffff, 0x00090008,
1291 	0x3c098, 0xffffffff, 0x00010000,
1292 	0x3c09c, 0xffffffff, 0x00030002,
1293 	0x3c0a0, 0xffffffff, 0x00040007,
1294 	0x3c0a4, 0xffffffff, 0x00060005,
1295 	0x3c0a8, 0xffffffff, 0x00090008,
1296 	0x3c0ac, 0xffffffff, 0x00010000,
1297 	0x3c0b0, 0xffffffff, 0x00030002,
1298 	0x3c0b4, 0xffffffff, 0x00040007,
1299 	0x3c0b8, 0xffffffff, 0x00060005,
1300 	0x3c0bc, 0xffffffff, 0x00090008,
1301 	0x3c000, 0xffffffff, 0x96e00200,
1302 	0x8708, 0xffffffff, 0x00900100,
1303 	0xc424, 0xffffffff, 0x0020003f,
1304 	0x38, 0xffffffff, 0x0140001c,
1305 	0x3c, 0x000f0000, 0x000f0000,
1306 	0x220, 0xffffffff, 0xC060000C,
1307 	0x224, 0xc0000fff, 0x00000100,
1308 	0xf90, 0xffffffff, 0x00000100,
1309 	0xf98, 0x00000101, 0x00000000,
1310 	0x20a8, 0xffffffff, 0x00000104,
1311 	0x55e4, 0xff000fff, 0x00000100,
1312 	0x30cc, 0xc0000fff, 0x00000104,
1313 	0xc1e4, 0x00000001, 0x00000001,
1314 	0xd00c, 0xff000ff0, 0x00000100,
1315 	0xd80c, 0xff000ff0, 0x00000100
1316 };
1317 
1318 static const u32 kalindi_golden_spm_registers[] =
1319 {
1320 	0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322 
1323 static const u32 kalindi_golden_common_registers[] =
1324 {
1325 	0xc770, 0xffffffff, 0x00000800,
1326 	0xc774, 0xffffffff, 0x00000800,
1327 	0xc798, 0xffffffff, 0x00007fbf,
1328 	0xc79c, 0xffffffff, 0x00007faf
1329 };
1330 
1331 static const u32 kalindi_golden_registers[] =
1332 {
1333 	0x3c000, 0xffffdfff, 0x6e944040,
1334 	0x55e4, 0xff607fff, 0xfc000100,
1335 	0x3c220, 0xff000fff, 0x00000100,
1336 	0x3c224, 0xff000fff, 0x00000100,
1337 	0x3c200, 0xfffc0fff, 0x00000100,
1338 	0x6ed8, 0x00010101, 0x00010000,
1339 	0x9830, 0xffffffff, 0x00000000,
1340 	0x9834, 0xf00fffff, 0x00000400,
1341 	0x5bb0, 0x000000f0, 0x00000070,
1342 	0x5bc0, 0xf0311fff, 0x80300000,
1343 	0x98f8, 0x73773777, 0x12010001,
1344 	0x98fc, 0xffffffff, 0x00000010,
1345 	0x9b7c, 0x00ff0000, 0x00fc0000,
1346 	0x8030, 0x00001f0f, 0x0000100a,
1347 	0x2f48, 0x73773777, 0x12010001,
1348 	0x2408, 0x000fffff, 0x000c007f,
1349 	0x8a14, 0xf000003f, 0x00000007,
1350 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1351 	0x30a04, 0x0000ff0f, 0x00000000,
1352 	0x28a4c, 0x07ffffff, 0x06000000,
1353 	0x4d8, 0x00000fff, 0x00000100,
1354 	0x3e78, 0x00000001, 0x00000002,
1355 	0xc768, 0x00000008, 0x00000008,
1356 	0x8c00, 0x000000ff, 0x00000003,
1357 	0x214f8, 0x01ff01ff, 0x00000002,
1358 	0x21498, 0x007ff800, 0x00200000,
1359 	0x2015c, 0xffffffff, 0x00000f40,
1360 	0x88c4, 0x001f3ae3, 0x00000082,
1361 	0x88d4, 0x0000001f, 0x00000010,
1362 	0x30934, 0xffffffff, 0x00000000
1363 };
1364 
1365 static const u32 kalindi_mgcg_cgcg_init[] =
1366 {
1367 	0xc420, 0xffffffff, 0xfffffffc,
1368 	0x30800, 0xffffffff, 0xe0000000,
1369 	0x3c2a0, 0xffffffff, 0x00000100,
1370 	0x3c208, 0xffffffff, 0x00000100,
1371 	0x3c2c0, 0xffffffff, 0x00000100,
1372 	0x3c2c8, 0xffffffff, 0x00000100,
1373 	0x3c2c4, 0xffffffff, 0x00000100,
1374 	0x55e4, 0xffffffff, 0x00600100,
1375 	0x3c280, 0xffffffff, 0x00000100,
1376 	0x3c214, 0xffffffff, 0x06000100,
1377 	0x3c220, 0xffffffff, 0x00000100,
1378 	0x3c218, 0xffffffff, 0x06000100,
1379 	0x3c204, 0xffffffff, 0x00000100,
1380 	0x3c2e0, 0xffffffff, 0x00000100,
1381 	0x3c224, 0xffffffff, 0x00000100,
1382 	0x3c200, 0xffffffff, 0x00000100,
1383 	0x3c230, 0xffffffff, 0x00000100,
1384 	0x3c234, 0xffffffff, 0x00000100,
1385 	0x3c250, 0xffffffff, 0x00000100,
1386 	0x3c254, 0xffffffff, 0x00000100,
1387 	0x3c258, 0xffffffff, 0x00000100,
1388 	0x3c25c, 0xffffffff, 0x00000100,
1389 	0x3c260, 0xffffffff, 0x00000100,
1390 	0x3c27c, 0xffffffff, 0x00000100,
1391 	0x3c278, 0xffffffff, 0x00000100,
1392 	0x3c210, 0xffffffff, 0x06000100,
1393 	0x3c290, 0xffffffff, 0x00000100,
1394 	0x3c274, 0xffffffff, 0x00000100,
1395 	0x3c2b4, 0xffffffff, 0x00000100,
1396 	0x3c2b0, 0xffffffff, 0x00000100,
1397 	0x3c270, 0xffffffff, 0x00000100,
1398 	0x30800, 0xffffffff, 0xe0000000,
1399 	0x3c020, 0xffffffff, 0x00010000,
1400 	0x3c024, 0xffffffff, 0x00030002,
1401 	0x3c028, 0xffffffff, 0x00040007,
1402 	0x3c02c, 0xffffffff, 0x00060005,
1403 	0x3c030, 0xffffffff, 0x00090008,
1404 	0x3c034, 0xffffffff, 0x00010000,
1405 	0x3c038, 0xffffffff, 0x00030002,
1406 	0x3c03c, 0xffffffff, 0x00040007,
1407 	0x3c040, 0xffffffff, 0x00060005,
1408 	0x3c044, 0xffffffff, 0x00090008,
1409 	0x3c000, 0xffffffff, 0x96e00200,
1410 	0x8708, 0xffffffff, 0x00900100,
1411 	0xc424, 0xffffffff, 0x0020003f,
1412 	0x38, 0xffffffff, 0x0140001c,
1413 	0x3c, 0x000f0000, 0x000f0000,
1414 	0x220, 0xffffffff, 0xC060000C,
1415 	0x224, 0xc0000fff, 0x00000100,
1416 	0x20a8, 0xffffffff, 0x00000104,
1417 	0x55e4, 0xff000fff, 0x00000100,
1418 	0x30cc, 0xc0000fff, 0x00000104,
1419 	0xc1e4, 0x00000001, 0x00000001,
1420 	0xd00c, 0xff000ff0, 0x00000100,
1421 	0xd80c, 0xff000ff0, 0x00000100
1422 };
1423 
1424 static const u32 hawaii_golden_spm_registers[] =
1425 {
1426 	0x30800, 0xe0ffffff, 0xe0000000
1427 };
1428 
1429 static const u32 hawaii_golden_common_registers[] =
1430 {
1431 	0x30800, 0xffffffff, 0xe0000000,
1432 	0x28350, 0xffffffff, 0x3a00161a,
1433 	0x28354, 0xffffffff, 0x0000002e,
1434 	0x9a10, 0xffffffff, 0x00018208,
1435 	0x98f8, 0xffffffff, 0x12011003
1436 };
1437 
1438 static const u32 hawaii_golden_registers[] =
1439 {
1440 	0x3354, 0x00000333, 0x00000333,
1441 	0x9a10, 0x00010000, 0x00058208,
1442 	0x9830, 0xffffffff, 0x00000000,
1443 	0x9834, 0xf00fffff, 0x00000400,
1444 	0x9838, 0x0002021c, 0x00020200,
1445 	0xc78, 0x00000080, 0x00000000,
1446 	0x5bb0, 0x000000f0, 0x00000070,
1447 	0x5bc0, 0xf0311fff, 0x80300000,
1448 	0x350c, 0x00810000, 0x408af000,
1449 	0x7030, 0x31000111, 0x00000011,
1450 	0x2f48, 0x73773777, 0x12010001,
1451 	0x2120, 0x0000007f, 0x0000001b,
1452 	0x21dc, 0x00007fb6, 0x00002191,
1453 	0x3628, 0x0000003f, 0x0000000a,
1454 	0x362c, 0x0000003f, 0x0000000a,
1455 	0x2ae4, 0x00073ffe, 0x000022a2,
1456 	0x240c, 0x000007ff, 0x00000000,
1457 	0x8bf0, 0x00002001, 0x00000001,
1458 	0x8b24, 0xffffffff, 0x00ffffff,
1459 	0x30a04, 0x0000ff0f, 0x00000000,
1460 	0x28a4c, 0x07ffffff, 0x06000000,
1461 	0x3e78, 0x00000001, 0x00000002,
1462 	0xc768, 0x00000008, 0x00000008,
1463 	0xc770, 0x00000f00, 0x00000800,
1464 	0xc774, 0x00000f00, 0x00000800,
1465 	0xc798, 0x00ffffff, 0x00ff7fbf,
1466 	0xc79c, 0x00ffffff, 0x00ff7faf,
1467 	0x8c00, 0x000000ff, 0x00000800,
1468 	0xe40, 0x00001fff, 0x00001fff,
1469 	0x9060, 0x0000007f, 0x00000020,
1470 	0x9508, 0x00010000, 0x00010000,
1471 	0xae00, 0x00100000, 0x000ff07c,
1472 	0xac14, 0x000003ff, 0x0000000f,
1473 	0xac10, 0xffffffff, 0x7564fdec,
1474 	0xac0c, 0xffffffff, 0x3120b9a8,
1475 	0xac08, 0x20000000, 0x0f9c0000
1476 };
1477 
1478 static const u32 hawaii_mgcg_cgcg_init[] =
1479 {
1480 	0xc420, 0xffffffff, 0xfffffffd,
1481 	0x30800, 0xffffffff, 0xe0000000,
1482 	0x3c2a0, 0xffffffff, 0x00000100,
1483 	0x3c208, 0xffffffff, 0x00000100,
1484 	0x3c2c0, 0xffffffff, 0x00000100,
1485 	0x3c2c8, 0xffffffff, 0x00000100,
1486 	0x3c2c4, 0xffffffff, 0x00000100,
1487 	0x55e4, 0xffffffff, 0x00200100,
1488 	0x3c280, 0xffffffff, 0x00000100,
1489 	0x3c214, 0xffffffff, 0x06000100,
1490 	0x3c220, 0xffffffff, 0x00000100,
1491 	0x3c218, 0xffffffff, 0x06000100,
1492 	0x3c204, 0xffffffff, 0x00000100,
1493 	0x3c2e0, 0xffffffff, 0x00000100,
1494 	0x3c224, 0xffffffff, 0x00000100,
1495 	0x3c200, 0xffffffff, 0x00000100,
1496 	0x3c230, 0xffffffff, 0x00000100,
1497 	0x3c234, 0xffffffff, 0x00000100,
1498 	0x3c250, 0xffffffff, 0x00000100,
1499 	0x3c254, 0xffffffff, 0x00000100,
1500 	0x3c258, 0xffffffff, 0x00000100,
1501 	0x3c25c, 0xffffffff, 0x00000100,
1502 	0x3c260, 0xffffffff, 0x00000100,
1503 	0x3c27c, 0xffffffff, 0x00000100,
1504 	0x3c278, 0xffffffff, 0x00000100,
1505 	0x3c210, 0xffffffff, 0x06000100,
1506 	0x3c290, 0xffffffff, 0x00000100,
1507 	0x3c274, 0xffffffff, 0x00000100,
1508 	0x3c2b4, 0xffffffff, 0x00000100,
1509 	0x3c2b0, 0xffffffff, 0x00000100,
1510 	0x3c270, 0xffffffff, 0x00000100,
1511 	0x30800, 0xffffffff, 0xe0000000,
1512 	0x3c020, 0xffffffff, 0x00010000,
1513 	0x3c024, 0xffffffff, 0x00030002,
1514 	0x3c028, 0xffffffff, 0x00040007,
1515 	0x3c02c, 0xffffffff, 0x00060005,
1516 	0x3c030, 0xffffffff, 0x00090008,
1517 	0x3c034, 0xffffffff, 0x00010000,
1518 	0x3c038, 0xffffffff, 0x00030002,
1519 	0x3c03c, 0xffffffff, 0x00040007,
1520 	0x3c040, 0xffffffff, 0x00060005,
1521 	0x3c044, 0xffffffff, 0x00090008,
1522 	0x3c048, 0xffffffff, 0x00010000,
1523 	0x3c04c, 0xffffffff, 0x00030002,
1524 	0x3c050, 0xffffffff, 0x00040007,
1525 	0x3c054, 0xffffffff, 0x00060005,
1526 	0x3c058, 0xffffffff, 0x00090008,
1527 	0x3c05c, 0xffffffff, 0x00010000,
1528 	0x3c060, 0xffffffff, 0x00030002,
1529 	0x3c064, 0xffffffff, 0x00040007,
1530 	0x3c068, 0xffffffff, 0x00060005,
1531 	0x3c06c, 0xffffffff, 0x00090008,
1532 	0x3c070, 0xffffffff, 0x00010000,
1533 	0x3c074, 0xffffffff, 0x00030002,
1534 	0x3c078, 0xffffffff, 0x00040007,
1535 	0x3c07c, 0xffffffff, 0x00060005,
1536 	0x3c080, 0xffffffff, 0x00090008,
1537 	0x3c084, 0xffffffff, 0x00010000,
1538 	0x3c088, 0xffffffff, 0x00030002,
1539 	0x3c08c, 0xffffffff, 0x00040007,
1540 	0x3c090, 0xffffffff, 0x00060005,
1541 	0x3c094, 0xffffffff, 0x00090008,
1542 	0x3c098, 0xffffffff, 0x00010000,
1543 	0x3c09c, 0xffffffff, 0x00030002,
1544 	0x3c0a0, 0xffffffff, 0x00040007,
1545 	0x3c0a4, 0xffffffff, 0x00060005,
1546 	0x3c0a8, 0xffffffff, 0x00090008,
1547 	0x3c0ac, 0xffffffff, 0x00010000,
1548 	0x3c0b0, 0xffffffff, 0x00030002,
1549 	0x3c0b4, 0xffffffff, 0x00040007,
1550 	0x3c0b8, 0xffffffff, 0x00060005,
1551 	0x3c0bc, 0xffffffff, 0x00090008,
1552 	0x3c0c0, 0xffffffff, 0x00010000,
1553 	0x3c0c4, 0xffffffff, 0x00030002,
1554 	0x3c0c8, 0xffffffff, 0x00040007,
1555 	0x3c0cc, 0xffffffff, 0x00060005,
1556 	0x3c0d0, 0xffffffff, 0x00090008,
1557 	0x3c0d4, 0xffffffff, 0x00010000,
1558 	0x3c0d8, 0xffffffff, 0x00030002,
1559 	0x3c0dc, 0xffffffff, 0x00040007,
1560 	0x3c0e0, 0xffffffff, 0x00060005,
1561 	0x3c0e4, 0xffffffff, 0x00090008,
1562 	0x3c0e8, 0xffffffff, 0x00010000,
1563 	0x3c0ec, 0xffffffff, 0x00030002,
1564 	0x3c0f0, 0xffffffff, 0x00040007,
1565 	0x3c0f4, 0xffffffff, 0x00060005,
1566 	0x3c0f8, 0xffffffff, 0x00090008,
1567 	0xc318, 0xffffffff, 0x00020200,
1568 	0x3350, 0xffffffff, 0x00000200,
1569 	0x15c0, 0xffffffff, 0x00000400,
1570 	0x55e8, 0xffffffff, 0x00000000,
1571 	0x2f50, 0xffffffff, 0x00000902,
1572 	0x3c000, 0xffffffff, 0x96940200,
1573 	0x8708, 0xffffffff, 0x00900100,
1574 	0xc424, 0xffffffff, 0x0020003f,
1575 	0x38, 0xffffffff, 0x0140001c,
1576 	0x3c, 0x000f0000, 0x000f0000,
1577 	0x220, 0xffffffff, 0xc060000c,
1578 	0x224, 0xc0000fff, 0x00000100,
1579 	0xf90, 0xffffffff, 0x00000100,
1580 	0xf98, 0x00000101, 0x00000000,
1581 	0x20a8, 0xffffffff, 0x00000104,
1582 	0x55e4, 0xff000fff, 0x00000100,
1583 	0x30cc, 0xc0000fff, 0x00000104,
1584 	0xc1e4, 0x00000001, 0x00000001,
1585 	0xd00c, 0xff000ff0, 0x00000100,
1586 	0xd80c, 0xff000ff0, 0x00000100
1587 };
1588 
1589 static const u32 godavari_golden_registers[] =
1590 {
1591 	0x55e4, 0xff607fff, 0xfc000100,
1592 	0x6ed8, 0x00010101, 0x00010000,
1593 	0x9830, 0xffffffff, 0x00000000,
1594 	0x98302, 0xf00fffff, 0x00000400,
1595 	0x6130, 0xffffffff, 0x00010000,
1596 	0x5bb0, 0x000000f0, 0x00000070,
1597 	0x5bc0, 0xf0311fff, 0x80300000,
1598 	0x98f8, 0x73773777, 0x12010001,
1599 	0x98fc, 0xffffffff, 0x00000010,
1600 	0x8030, 0x00001f0f, 0x0000100a,
1601 	0x2f48, 0x73773777, 0x12010001,
1602 	0x2408, 0x000fffff, 0x000c007f,
1603 	0x8a14, 0xf000003f, 0x00000007,
1604 	0x8b24, 0xffffffff, 0x00ff0fff,
1605 	0x30a04, 0x0000ff0f, 0x00000000,
1606 	0x28a4c, 0x07ffffff, 0x06000000,
1607 	0x4d8, 0x00000fff, 0x00000100,
1608 	0xd014, 0x00010000, 0x00810001,
1609 	0xd814, 0x00010000, 0x00810001,
1610 	0x3e78, 0x00000001, 0x00000002,
1611 	0xc768, 0x00000008, 0x00000008,
1612 	0xc770, 0x00000f00, 0x00000800,
1613 	0xc774, 0x00000f00, 0x00000800,
1614 	0xc798, 0x00ffffff, 0x00ff7fbf,
1615 	0xc79c, 0x00ffffff, 0x00ff7faf,
1616 	0x8c00, 0x000000ff, 0x00000001,
1617 	0x214f8, 0x01ff01ff, 0x00000002,
1618 	0x21498, 0x007ff800, 0x00200000,
1619 	0x2015c, 0xffffffff, 0x00000f40,
1620 	0x88c4, 0x001f3ae3, 0x00000082,
1621 	0x88d4, 0x0000001f, 0x00000010,
1622 	0x30934, 0xffffffff, 0x00000000
1623 };
1624 
1625 
1626 static void cik_init_golden_registers(struct radeon_device *rdev)
1627 {
1628 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629 	mutex_lock(&rdev->grbm_idx_mutex);
1630 	switch (rdev->family) {
1631 	case CHIP_BONAIRE:
1632 		radeon_program_register_sequence(rdev,
1633 						 bonaire_mgcg_cgcg_init,
1634 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_golden_registers,
1637 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_common_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_spm_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644 		break;
1645 	case CHIP_KABINI:
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_mgcg_cgcg_init,
1648 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_golden_registers,
1651 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_common_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_spm_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658 		break;
1659 	case CHIP_MULLINS:
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_mgcg_cgcg_init,
1662 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663 		radeon_program_register_sequence(rdev,
1664 						 godavari_golden_registers,
1665 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666 		radeon_program_register_sequence(rdev,
1667 						 kalindi_golden_common_registers,
1668 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_spm_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672 		break;
1673 	case CHIP_KAVERI:
1674 		radeon_program_register_sequence(rdev,
1675 						 spectre_mgcg_cgcg_init,
1676 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_golden_registers,
1679 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_common_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_spm_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686 		break;
1687 	case CHIP_HAWAII:
1688 		radeon_program_register_sequence(rdev,
1689 						 hawaii_mgcg_cgcg_init,
1690 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_golden_registers,
1693 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_common_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_spm_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700 		break;
1701 	default:
1702 		break;
1703 	}
1704 	mutex_unlock(&rdev->grbm_idx_mutex);
1705 }
1706 
1707 /**
1708  * cik_get_xclk - get the xclk
1709  *
1710  * @rdev: radeon_device pointer
1711  *
1712  * Returns the reference clock used by the gfx engine
1713  * (CIK).
1714  */
1715 u32 cik_get_xclk(struct radeon_device *rdev)
1716 {
1717 	u32 reference_clock = rdev->clock.spll.reference_freq;
1718 
1719 	if (rdev->flags & RADEON_IS_IGP) {
1720 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721 			return reference_clock / 2;
1722 	} else {
1723 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724 			return reference_clock / 4;
1725 	}
1726 	return reference_clock;
1727 }
1728 
1729 /**
1730  * cik_mm_rdoorbell - read a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  *
1735  * Returns the value in the doorbell aperture at the
1736  * requested doorbell index (CIK).
1737  */
1738 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739 {
1740 	if (index < rdev->doorbell.num_doorbells) {
1741 		return readl(rdev->doorbell.ptr + index);
1742 	} else {
1743 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744 		return 0;
1745 	}
1746 }
1747 
1748 /**
1749  * cik_mm_wdoorbell - write a doorbell dword
1750  *
1751  * @rdev: radeon_device pointer
1752  * @index: doorbell index
1753  * @v: value to write
1754  *
1755  * Writes @v to the doorbell aperture at the
1756  * requested doorbell index (CIK).
1757  */
1758 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759 {
1760 	if (index < rdev->doorbell.num_doorbells) {
1761 		writel(v, rdev->doorbell.ptr + index);
1762 	} else {
1763 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764 	}
1765 }
1766 
1767 #define BONAIRE_IO_MC_REGS_SIZE 36
1768 
1769 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770 {
1771 	{0x00000070, 0x04400000},
1772 	{0x00000071, 0x80c01803},
1773 	{0x00000072, 0x00004004},
1774 	{0x00000073, 0x00000100},
1775 	{0x00000074, 0x00ff0000},
1776 	{0x00000075, 0x34000000},
1777 	{0x00000076, 0x08000014},
1778 	{0x00000077, 0x00cc08ec},
1779 	{0x00000078, 0x00000400},
1780 	{0x00000079, 0x00000000},
1781 	{0x0000007a, 0x04090000},
1782 	{0x0000007c, 0x00000000},
1783 	{0x0000007e, 0x4408a8e8},
1784 	{0x0000007f, 0x00000304},
1785 	{0x00000080, 0x00000000},
1786 	{0x00000082, 0x00000001},
1787 	{0x00000083, 0x00000002},
1788 	{0x00000084, 0xf3e4f400},
1789 	{0x00000085, 0x052024e3},
1790 	{0x00000087, 0x00000000},
1791 	{0x00000088, 0x01000000},
1792 	{0x0000008a, 0x1c0a0000},
1793 	{0x0000008b, 0xff010000},
1794 	{0x0000008d, 0xffffefff},
1795 	{0x0000008e, 0xfff3efff},
1796 	{0x0000008f, 0xfff3efbf},
1797 	{0x00000092, 0xf7ffffff},
1798 	{0x00000093, 0xffffff7f},
1799 	{0x00000095, 0x00101101},
1800 	{0x00000096, 0x00000fff},
1801 	{0x00000097, 0x00116fff},
1802 	{0x00000098, 0x60010000},
1803 	{0x00000099, 0x10010000},
1804 	{0x0000009a, 0x00006000},
1805 	{0x0000009b, 0x00001000},
1806 	{0x0000009f, 0x00b48000}
1807 };
1808 
1809 #define HAWAII_IO_MC_REGS_SIZE 22
1810 
1811 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812 {
1813 	{0x0000007d, 0x40000000},
1814 	{0x0000007e, 0x40180304},
1815 	{0x0000007f, 0x0000ff00},
1816 	{0x00000081, 0x00000000},
1817 	{0x00000083, 0x00000800},
1818 	{0x00000086, 0x00000000},
1819 	{0x00000087, 0x00000100},
1820 	{0x00000088, 0x00020100},
1821 	{0x00000089, 0x00000000},
1822 	{0x0000008b, 0x00040000},
1823 	{0x0000008c, 0x00000100},
1824 	{0x0000008e, 0xff010000},
1825 	{0x00000090, 0xffffefff},
1826 	{0x00000091, 0xfff3efff},
1827 	{0x00000092, 0xfff3efbf},
1828 	{0x00000093, 0xf7ffffff},
1829 	{0x00000094, 0xffffff7f},
1830 	{0x00000095, 0x00000fff},
1831 	{0x00000096, 0x00116fff},
1832 	{0x00000097, 0x60010000},
1833 	{0x00000098, 0x10010000},
1834 	{0x0000009f, 0x00c79000}
1835 };
1836 
1837 
1838 /**
1839  * cik_srbm_select - select specific register instances
1840  *
1841  * @rdev: radeon_device pointer
1842  * @me: selected ME (micro engine)
1843  * @pipe: pipe
1844  * @queue: queue
1845  * @vmid: VMID
1846  *
1847  * Switches the currently active registers instances.  Some
1848  * registers are instanced per VMID, others are instanced per
1849  * me/pipe/queue combination.
1850  */
1851 static void cik_srbm_select(struct radeon_device *rdev,
1852 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1853 {
1854 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855 			     MEID(me & 0x3) |
1856 			     VMID(vmid & 0xf) |
1857 			     QUEUEID(queue & 0x7));
1858 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859 }
1860 
1861 /* ucode loading */
1862 /**
1863  * ci_mc_load_microcode - load MC ucode into the hw
1864  *
1865  * @rdev: radeon_device pointer
1866  *
1867  * Load the GDDR MC ucode into the hw (CIK).
1868  * Returns 0 on success, error on failure.
1869  */
1870 int ci_mc_load_microcode(struct radeon_device *rdev)
1871 {
1872 	const __be32 *fw_data = NULL;
1873 	const __le32 *new_fw_data = NULL;
1874 	u32 running, blackout = 0, tmp;
1875 	u32 *io_mc_regs = NULL;
1876 	const __le32 *new_io_mc_regs = NULL;
1877 	int i, regs_size, ucode_size;
1878 
1879 	if (!rdev->mc_fw)
1880 		return -EINVAL;
1881 
1882 	if (rdev->new_fw) {
1883 		const struct mc_firmware_header_v1_0 *hdr =
1884 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885 
1886 		radeon_ucode_print_mc_hdr(&hdr->header);
1887 
1888 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889 		new_io_mc_regs = (const __le32 *)
1890 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892 		new_fw_data = (const __le32 *)
1893 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894 	} else {
1895 		ucode_size = rdev->mc_fw->size / 4;
1896 
1897 		switch (rdev->family) {
1898 		case CHIP_BONAIRE:
1899 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901 			break;
1902 		case CHIP_HAWAII:
1903 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1905 			break;
1906 		default:
1907 			return -EINVAL;
1908 		}
1909 		fw_data = (const __be32 *)rdev->mc_fw->data;
1910 	}
1911 
1912 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913 
1914 	if (running == 0) {
1915 		if (running) {
1916 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1917 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1918 		}
1919 
1920 		/* reset the engine and set to writable */
1921 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1922 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1923 
1924 		/* load mc io regs */
1925 		for (i = 0; i < regs_size; i++) {
1926 			if (rdev->new_fw) {
1927 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1928 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1929 			} else {
1930 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1931 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1932 			}
1933 		}
1934 
1935 		tmp = RREG32(MC_SEQ_MISC0);
1936 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1939 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1940 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1941 		}
1942 
1943 		/* load the MC ucode */
1944 		for (i = 0; i < ucode_size; i++) {
1945 			if (rdev->new_fw)
1946 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1947 			else
1948 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1949 		}
1950 
1951 		/* put the engine back into the active state */
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1953 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1954 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1955 
1956 		/* wait for training to complete */
1957 		for (i = 0; i < rdev->usec_timeout; i++) {
1958 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1959 				break;
1960 			udelay(1);
1961 		}
1962 		for (i = 0; i < rdev->usec_timeout; i++) {
1963 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1964 				break;
1965 			udelay(1);
1966 		}
1967 
1968 		if (running)
1969 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1970 	}
1971 
1972 	return 0;
1973 }
1974 
1975 /**
1976  * cik_init_microcode - load ucode images from disk
1977  *
1978  * @rdev: radeon_device pointer
1979  *
1980  * Use the firmware interface to load the ucode images into
1981  * the driver (not loaded into hw).
1982  * Returns 0 on success, error on failure.
1983  */
1984 static int cik_init_microcode(struct radeon_device *rdev)
1985 {
1986 	const char *chip_name;
1987 	const char *new_chip_name;
1988 	size_t pfp_req_size, me_req_size, ce_req_size,
1989 		mec_req_size, rlc_req_size, mc_req_size = 0,
1990 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1991 	char fw_name[30];
1992 	int new_fw = 0;
1993 	int err;
1994 	int num_fw;
1995 	bool new_smc = false;
1996 
1997 	DRM_DEBUG("\n");
1998 
1999 	switch (rdev->family) {
2000 	case CHIP_BONAIRE:
2001 		chip_name = "BONAIRE";
2002 		if ((rdev->pdev->revision == 0x80) ||
2003 		    (rdev->pdev->revision == 0x81) ||
2004 		    (rdev->pdev->device == 0x665f))
2005 			new_smc = true;
2006 		new_chip_name = "bonaire";
2007 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2008 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2009 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2010 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2011 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2012 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2013 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2014 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2015 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2016 		num_fw = 8;
2017 		break;
2018 	case CHIP_HAWAII:
2019 		chip_name = "HAWAII";
2020 		if (rdev->pdev->revision == 0x80)
2021 			new_smc = true;
2022 		new_chip_name = "hawaii";
2023 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2024 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2025 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2026 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2027 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2028 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2029 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2030 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2032 		num_fw = 8;
2033 		break;
2034 	case CHIP_KAVERI:
2035 		chip_name = "KAVERI";
2036 		new_chip_name = "kaveri";
2037 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2038 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2039 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2040 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2041 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2042 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2043 		num_fw = 7;
2044 		break;
2045 	case CHIP_KABINI:
2046 		chip_name = "KABINI";
2047 		new_chip_name = "kabini";
2048 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2049 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2050 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2051 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2052 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2053 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2054 		num_fw = 6;
2055 		break;
2056 	case CHIP_MULLINS:
2057 		chip_name = "MULLINS";
2058 		new_chip_name = "mullins";
2059 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2060 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2061 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2062 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2063 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2064 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2065 		num_fw = 6;
2066 		break;
2067 	default: BUG();
2068 	}
2069 
2070 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2071 
2072 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2073 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2074 	if (err) {
2075 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2076 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2077 		if (err)
2078 			goto out;
2079 		if (rdev->pfp_fw->size != pfp_req_size) {
2080 			printk(KERN_ERR
2081 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2082 			       rdev->pfp_fw->size, fw_name);
2083 			err = -EINVAL;
2084 			goto out;
2085 		}
2086 	} else {
2087 		err = radeon_ucode_validate(rdev->pfp_fw);
2088 		if (err) {
2089 			printk(KERN_ERR
2090 			       "cik_fw: validation failed for firmware \"%s\"\n",
2091 			       fw_name);
2092 			goto out;
2093 		} else {
2094 			new_fw++;
2095 		}
2096 	}
2097 
2098 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2099 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2100 	if (err) {
2101 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2102 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2103 		if (err)
2104 			goto out;
2105 		if (rdev->me_fw->size != me_req_size) {
2106 			printk(KERN_ERR
2107 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2108 			       rdev->me_fw->size, fw_name);
2109 			err = -EINVAL;
2110 		}
2111 	} else {
2112 		err = radeon_ucode_validate(rdev->me_fw);
2113 		if (err) {
2114 			printk(KERN_ERR
2115 			       "cik_fw: validation failed for firmware \"%s\"\n",
2116 			       fw_name);
2117 			goto out;
2118 		} else {
2119 			new_fw++;
2120 		}
2121 	}
2122 
2123 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2124 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2125 	if (err) {
2126 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2127 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2128 		if (err)
2129 			goto out;
2130 		if (rdev->ce_fw->size != ce_req_size) {
2131 			printk(KERN_ERR
2132 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133 			       rdev->ce_fw->size, fw_name);
2134 			err = -EINVAL;
2135 		}
2136 	} else {
2137 		err = radeon_ucode_validate(rdev->ce_fw);
2138 		if (err) {
2139 			printk(KERN_ERR
2140 			       "cik_fw: validation failed for firmware \"%s\"\n",
2141 			       fw_name);
2142 			goto out;
2143 		} else {
2144 			new_fw++;
2145 		}
2146 	}
2147 
2148 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2149 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2150 	if (err) {
2151 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2152 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2153 		if (err)
2154 			goto out;
2155 		if (rdev->mec_fw->size != mec_req_size) {
2156 			printk(KERN_ERR
2157 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2158 			       rdev->mec_fw->size, fw_name);
2159 			err = -EINVAL;
2160 		}
2161 	} else {
2162 		err = radeon_ucode_validate(rdev->mec_fw);
2163 		if (err) {
2164 			printk(KERN_ERR
2165 			       "cik_fw: validation failed for firmware \"%s\"\n",
2166 			       fw_name);
2167 			goto out;
2168 		} else {
2169 			new_fw++;
2170 		}
2171 	}
2172 
2173 	if (rdev->family == CHIP_KAVERI) {
2174 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2175 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2176 		if (err) {
2177 			goto out;
2178 		} else {
2179 			err = radeon_ucode_validate(rdev->mec2_fw);
2180 			if (err) {
2181 				goto out;
2182 			} else {
2183 				new_fw++;
2184 			}
2185 		}
2186 	}
2187 
2188 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2189 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2190 	if (err) {
2191 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2192 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2193 		if (err)
2194 			goto out;
2195 		if (rdev->rlc_fw->size != rlc_req_size) {
2196 			printk(KERN_ERR
2197 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2198 			       rdev->rlc_fw->size, fw_name);
2199 			err = -EINVAL;
2200 		}
2201 	} else {
2202 		err = radeon_ucode_validate(rdev->rlc_fw);
2203 		if (err) {
2204 			printk(KERN_ERR
2205 			       "cik_fw: validation failed for firmware \"%s\"\n",
2206 			       fw_name);
2207 			goto out;
2208 		} else {
2209 			new_fw++;
2210 		}
2211 	}
2212 
2213 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2214 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2215 	if (err) {
2216 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2217 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2218 		if (err)
2219 			goto out;
2220 		if (rdev->sdma_fw->size != sdma_req_size) {
2221 			printk(KERN_ERR
2222 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2223 			       rdev->sdma_fw->size, fw_name);
2224 			err = -EINVAL;
2225 		}
2226 	} else {
2227 		err = radeon_ucode_validate(rdev->sdma_fw);
2228 		if (err) {
2229 			printk(KERN_ERR
2230 			       "cik_fw: validation failed for firmware \"%s\"\n",
2231 			       fw_name);
2232 			goto out;
2233 		} else {
2234 			new_fw++;
2235 		}
2236 	}
2237 
2238 	/* No SMC, MC ucode on APUs */
2239 	if (!(rdev->flags & RADEON_IS_IGP)) {
2240 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2241 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2242 		if (err) {
2243 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2244 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2245 			if (err) {
2246 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2247 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2248 				if (err)
2249 					goto out;
2250 			}
2251 			if ((rdev->mc_fw->size != mc_req_size) &&
2252 			    (rdev->mc_fw->size != mc2_req_size)){
2253 				printk(KERN_ERR
2254 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2255 				       rdev->mc_fw->size, fw_name);
2256 				err = -EINVAL;
2257 			}
2258 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2259 		} else {
2260 			err = radeon_ucode_validate(rdev->mc_fw);
2261 			if (err) {
2262 				printk(KERN_ERR
2263 				       "cik_fw: validation failed for firmware \"%s\"\n",
2264 				       fw_name);
2265 				goto out;
2266 			} else {
2267 				new_fw++;
2268 			}
2269 		}
2270 
2271 		if (new_smc)
2272 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2273 		else
2274 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2275 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2276 		if (err) {
2277 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2278 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2279 			if (err) {
2280 				printk(KERN_ERR
2281 				       "smc: error loading firmware \"%s\"\n",
2282 				       fw_name);
2283 				release_firmware(rdev->smc_fw);
2284 				rdev->smc_fw = NULL;
2285 				err = 0;
2286 			} else if (rdev->smc_fw->size != smc_req_size) {
2287 				printk(KERN_ERR
2288 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2289 				       rdev->smc_fw->size, fw_name);
2290 				err = -EINVAL;
2291 			}
2292 		} else {
2293 			err = radeon_ucode_validate(rdev->smc_fw);
2294 			if (err) {
2295 				printk(KERN_ERR
2296 				       "cik_fw: validation failed for firmware \"%s\"\n",
2297 				       fw_name);
2298 				goto out;
2299 			} else {
2300 				new_fw++;
2301 			}
2302 		}
2303 	}
2304 
2305 	if (new_fw == 0) {
2306 		rdev->new_fw = false;
2307 	} else if (new_fw < num_fw) {
2308 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2309 		err = -EINVAL;
2310 	} else {
2311 		rdev->new_fw = true;
2312 	}
2313 
2314 out:
2315 	if (err) {
2316 		if (err != -EINVAL)
2317 			printk(KERN_ERR
2318 			       "cik_cp: Failed to load firmware \"%s\"\n",
2319 			       fw_name);
2320 		release_firmware(rdev->pfp_fw);
2321 		rdev->pfp_fw = NULL;
2322 		release_firmware(rdev->me_fw);
2323 		rdev->me_fw = NULL;
2324 		release_firmware(rdev->ce_fw);
2325 		rdev->ce_fw = NULL;
2326 		release_firmware(rdev->mec_fw);
2327 		rdev->mec_fw = NULL;
2328 		release_firmware(rdev->mec2_fw);
2329 		rdev->mec2_fw = NULL;
2330 		release_firmware(rdev->rlc_fw);
2331 		rdev->rlc_fw = NULL;
2332 		release_firmware(rdev->sdma_fw);
2333 		rdev->sdma_fw = NULL;
2334 		release_firmware(rdev->mc_fw);
2335 		rdev->mc_fw = NULL;
2336 		release_firmware(rdev->smc_fw);
2337 		rdev->smc_fw = NULL;
2338 	}
2339 	return err;
2340 }
2341 
2342 /*
2343  * Core functions
2344  */
2345 /**
2346  * cik_tiling_mode_table_init - init the hw tiling table
2347  *
2348  * @rdev: radeon_device pointer
2349  *
2350  * Starting with SI, the tiling setup is done globally in a
2351  * set of 32 tiling modes.  Rather than selecting each set of
2352  * parameters per surface as on older asics, we just select
2353  * which index in the tiling table we want to use, and the
2354  * surface uses those parameters (CIK).
2355  */
2356 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2357 {
2358 	u32 *tile = rdev->config.cik.tile_mode_array;
2359 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2360 	const u32 num_tile_mode_states =
2361 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2362 	const u32 num_secondary_tile_mode_states =
2363 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2364 	u32 reg_offset, split_equal_to_row_size;
2365 	u32 num_pipe_configs;
2366 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2367 		rdev->config.cik.max_shader_engines;
2368 
2369 	switch (rdev->config.cik.mem_row_size_in_kb) {
2370 	case 1:
2371 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2372 		break;
2373 	case 2:
2374 	default:
2375 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2376 		break;
2377 	case 4:
2378 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2379 		break;
2380 	}
2381 
2382 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2383 	if (num_pipe_configs > 8)
2384 		num_pipe_configs = 16;
2385 
2386 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2387 		tile[reg_offset] = 0;
2388 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389 		macrotile[reg_offset] = 0;
2390 
2391 	switch(num_pipe_configs) {
2392 	case 16:
2393 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2397 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2401 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2405 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2407 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2409 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 			   TILE_SPLIT(split_equal_to_row_size));
2413 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2418 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2420 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2422 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 			   TILE_SPLIT(split_equal_to_row_size));
2424 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2425 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2426 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2429 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2435 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2436 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2438 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2439 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2444 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2449 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2451 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2453 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2457 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2459 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2466 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2468 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2471 
2472 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 			   NUM_BANKS(ADDR_SURF_16_BANK));
2476 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 			   NUM_BANKS(ADDR_SURF_16_BANK));
2480 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 			   NUM_BANKS(ADDR_SURF_16_BANK));
2484 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			   NUM_BANKS(ADDR_SURF_16_BANK));
2488 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			   NUM_BANKS(ADDR_SURF_8_BANK));
2492 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 			   NUM_BANKS(ADDR_SURF_4_BANK));
2496 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 			   NUM_BANKS(ADDR_SURF_2_BANK));
2500 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2503 			   NUM_BANKS(ADDR_SURF_16_BANK));
2504 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2506 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2507 			   NUM_BANKS(ADDR_SURF_16_BANK));
2508 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 			    NUM_BANKS(ADDR_SURF_16_BANK));
2512 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 			    NUM_BANKS(ADDR_SURF_8_BANK));
2516 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 			    NUM_BANKS(ADDR_SURF_4_BANK));
2520 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2523 			    NUM_BANKS(ADDR_SURF_2_BANK));
2524 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2527 			    NUM_BANKS(ADDR_SURF_2_BANK));
2528 
2529 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2530 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2531 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2533 		break;
2534 
2535 	case 8:
2536 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2540 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2544 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2548 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2552 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 			   TILE_SPLIT(split_equal_to_row_size));
2556 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2557 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2559 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2563 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2564 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 			   TILE_SPLIT(split_equal_to_row_size));
2567 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2568 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2569 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2572 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2579 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2580 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2587 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2594 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2596 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2597 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2602 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2611 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 
2615 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 				NUM_BANKS(ADDR_SURF_8_BANK));
2635 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638 				NUM_BANKS(ADDR_SURF_4_BANK));
2639 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 				NUM_BANKS(ADDR_SURF_2_BANK));
2643 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658 				NUM_BANKS(ADDR_SURF_16_BANK));
2659 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 				NUM_BANKS(ADDR_SURF_8_BANK));
2663 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666 				NUM_BANKS(ADDR_SURF_4_BANK));
2667 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670 				NUM_BANKS(ADDR_SURF_2_BANK));
2671 
2672 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2673 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2674 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2675 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2676 		break;
2677 
2678 	case 4:
2679 		if (num_rbs == 4) {
2680 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2684 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2688 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2692 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2694 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2696 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2698 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 			   TILE_SPLIT(split_equal_to_row_size));
2700 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2701 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2705 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2707 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2708 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2709 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 			   TILE_SPLIT(split_equal_to_row_size));
2711 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2712 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2713 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2714 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2716 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2721 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2725 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2728 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2731 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2740 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2744 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2746 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2755 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758 
2759 		} else if (num_rbs < 4) {
2760 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2764 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2768 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2772 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2774 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2776 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2778 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779 			   TILE_SPLIT(split_equal_to_row_size));
2780 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2781 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2783 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2785 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2787 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2788 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 			   TILE_SPLIT(split_equal_to_row_size));
2791 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2792 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2793 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2796 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2801 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2803 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2811 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2817 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2826 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2832 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2833 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2835 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 		}
2839 
2840 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843 				NUM_BANKS(ADDR_SURF_16_BANK));
2844 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863 				NUM_BANKS(ADDR_SURF_8_BANK));
2864 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2867 				NUM_BANKS(ADDR_SURF_4_BANK));
2868 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871 				NUM_BANKS(ADDR_SURF_16_BANK));
2872 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879 				NUM_BANKS(ADDR_SURF_16_BANK));
2880 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883 				NUM_BANKS(ADDR_SURF_16_BANK));
2884 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887 				NUM_BANKS(ADDR_SURF_16_BANK));
2888 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891 				NUM_BANKS(ADDR_SURF_8_BANK));
2892 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2894 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2895 				NUM_BANKS(ADDR_SURF_4_BANK));
2896 
2897 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2898 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2899 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2900 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2901 		break;
2902 
2903 	case 2:
2904 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2908 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2912 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914 			   PIPE_CONFIG(ADDR_SURF_P2) |
2915 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2916 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918 			   PIPE_CONFIG(ADDR_SURF_P2) |
2919 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2920 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922 			   PIPE_CONFIG(ADDR_SURF_P2) |
2923 			   TILE_SPLIT(split_equal_to_row_size));
2924 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2925 			   PIPE_CONFIG(ADDR_SURF_P2) |
2926 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929 			   PIPE_CONFIG(ADDR_SURF_P2) |
2930 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2932 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2933 			   PIPE_CONFIG(ADDR_SURF_P2) |
2934 			   TILE_SPLIT(split_equal_to_row_size));
2935 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2936 			   PIPE_CONFIG(ADDR_SURF_P2);
2937 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939 			   PIPE_CONFIG(ADDR_SURF_P2));
2940 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946 			    PIPE_CONFIG(ADDR_SURF_P2) |
2947 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2949 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 			    PIPE_CONFIG(ADDR_SURF_P2) |
2951 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2955 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2960 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961 			    PIPE_CONFIG(ADDR_SURF_P2) |
2962 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 			    PIPE_CONFIG(ADDR_SURF_P2) |
2966 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2969 			    PIPE_CONFIG(ADDR_SURF_P2));
2970 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972 			    PIPE_CONFIG(ADDR_SURF_P2) |
2973 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2975 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 			    PIPE_CONFIG(ADDR_SURF_P2) |
2977 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2978 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2979 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980 			    PIPE_CONFIG(ADDR_SURF_P2) |
2981 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982 
2983 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002 				NUM_BANKS(ADDR_SURF_16_BANK));
3003 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010 				NUM_BANKS(ADDR_SURF_8_BANK));
3011 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014 				NUM_BANKS(ADDR_SURF_16_BANK));
3015 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3029 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030 				NUM_BANKS(ADDR_SURF_16_BANK));
3031 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 				NUM_BANKS(ADDR_SURF_16_BANK));
3035 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038 				NUM_BANKS(ADDR_SURF_8_BANK));
3039 
3040 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3041 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3042 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3043 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3044 		break;
3045 
3046 	default:
3047 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3048 	}
3049 }
3050 
3051 /**
3052  * cik_select_se_sh - select which SE, SH to address
3053  *
3054  * @rdev: radeon_device pointer
3055  * @se_num: shader engine to address
3056  * @sh_num: sh block to address
3057  *
3058  * Select which SE, SH combinations to address. Certain
3059  * registers are instanced per SE or SH.  0xffffffff means
3060  * broadcast to all SEs or SHs (CIK).
3061  */
3062 static void cik_select_se_sh(struct radeon_device *rdev,
3063 			     u32 se_num, u32 sh_num)
3064 {
3065 	u32 data = INSTANCE_BROADCAST_WRITES;
3066 
3067 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3068 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3069 	else if (se_num == 0xffffffff)
3070 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3071 	else if (sh_num == 0xffffffff)
3072 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3073 	else
3074 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3075 	WREG32(GRBM_GFX_INDEX, data);
3076 }
3077 
3078 /**
3079  * cik_create_bitmask - create a bitmask
3080  *
3081  * @bit_width: length of the mask
3082  *
3083  * create a variable length bit mask (CIK).
3084  * Returns the bitmask.
3085  */
3086 static u32 cik_create_bitmask(u32 bit_width)
3087 {
3088 	u32 i, mask = 0;
3089 
3090 	for (i = 0; i < bit_width; i++) {
3091 		mask <<= 1;
3092 		mask |= 1;
3093 	}
3094 	return mask;
3095 }
3096 
3097 /**
3098  * cik_get_rb_disabled - computes the mask of disabled RBs
3099  *
3100  * @rdev: radeon_device pointer
3101  * @max_rb_num: max RBs (render backends) for the asic
3102  * @se_num: number of SEs (shader engines) for the asic
3103  * @sh_per_se: number of SH blocks per SE for the asic
3104  *
3105  * Calculates the bitmask of disabled RBs (CIK).
3106  * Returns the disabled RB bitmask.
3107  */
3108 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3109 			      u32 max_rb_num_per_se,
3110 			      u32 sh_per_se)
3111 {
3112 	u32 data, mask;
3113 
3114 	data = RREG32(CC_RB_BACKEND_DISABLE);
3115 	if (data & 1)
3116 		data &= BACKEND_DISABLE_MASK;
3117 	else
3118 		data = 0;
3119 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3120 
3121 	data >>= BACKEND_DISABLE_SHIFT;
3122 
3123 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3124 
3125 	return data & mask;
3126 }
3127 
3128 /**
3129  * cik_setup_rb - setup the RBs on the asic
3130  *
3131  * @rdev: radeon_device pointer
3132  * @se_num: number of SEs (shader engines) for the asic
3133  * @sh_per_se: number of SH blocks per SE for the asic
3134  * @max_rb_num: max RBs (render backends) for the asic
3135  *
3136  * Configures per-SE/SH RB registers (CIK).
3137  */
3138 static void cik_setup_rb(struct radeon_device *rdev,
3139 			 u32 se_num, u32 sh_per_se,
3140 			 u32 max_rb_num_per_se)
3141 {
3142 	int i, j;
3143 	u32 data, mask;
3144 	u32 disabled_rbs = 0;
3145 	u32 enabled_rbs = 0;
3146 
3147 	mutex_lock(&rdev->grbm_idx_mutex);
3148 	for (i = 0; i < se_num; i++) {
3149 		for (j = 0; j < sh_per_se; j++) {
3150 			cik_select_se_sh(rdev, i, j);
3151 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3152 			if (rdev->family == CHIP_HAWAII)
3153 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3154 			else
3155 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3156 		}
3157 	}
3158 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3159 	mutex_unlock(&rdev->grbm_idx_mutex);
3160 
3161 	mask = 1;
3162 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3163 		if (!(disabled_rbs & mask))
3164 			enabled_rbs |= mask;
3165 		mask <<= 1;
3166 	}
3167 
3168 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3169 
3170 	mutex_lock(&rdev->grbm_idx_mutex);
3171 	for (i = 0; i < se_num; i++) {
3172 		cik_select_se_sh(rdev, i, 0xffffffff);
3173 		data = 0;
3174 		for (j = 0; j < sh_per_se; j++) {
3175 			switch (enabled_rbs & 3) {
3176 			case 0:
3177 				if (j == 0)
3178 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3179 				else
3180 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3181 				break;
3182 			case 1:
3183 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3184 				break;
3185 			case 2:
3186 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3187 				break;
3188 			case 3:
3189 			default:
3190 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3191 				break;
3192 			}
3193 			enabled_rbs >>= 2;
3194 		}
3195 		WREG32(PA_SC_RASTER_CONFIG, data);
3196 	}
3197 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3198 	mutex_unlock(&rdev->grbm_idx_mutex);
3199 }
3200 
3201 /**
3202  * cik_gpu_init - setup the 3D engine
3203  *
3204  * @rdev: radeon_device pointer
3205  *
3206  * Configures the 3D engine and tiling configuration
3207  * registers so that the 3D engine is usable.
3208  */
3209 static void cik_gpu_init(struct radeon_device *rdev)
3210 {
3211 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3212 	u32 mc_shared_chmap, mc_arb_ramcfg;
3213 	u32 hdp_host_path_cntl;
3214 	u32 tmp;
3215 	int i, j;
3216 
3217 	switch (rdev->family) {
3218 	case CHIP_BONAIRE:
3219 		rdev->config.cik.max_shader_engines = 2;
3220 		rdev->config.cik.max_tile_pipes = 4;
3221 		rdev->config.cik.max_cu_per_sh = 7;
3222 		rdev->config.cik.max_sh_per_se = 1;
3223 		rdev->config.cik.max_backends_per_se = 2;
3224 		rdev->config.cik.max_texture_channel_caches = 4;
3225 		rdev->config.cik.max_gprs = 256;
3226 		rdev->config.cik.max_gs_threads = 32;
3227 		rdev->config.cik.max_hw_contexts = 8;
3228 
3229 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3230 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3231 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3232 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3233 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3234 		break;
3235 	case CHIP_HAWAII:
3236 		rdev->config.cik.max_shader_engines = 4;
3237 		rdev->config.cik.max_tile_pipes = 16;
3238 		rdev->config.cik.max_cu_per_sh = 11;
3239 		rdev->config.cik.max_sh_per_se = 1;
3240 		rdev->config.cik.max_backends_per_se = 4;
3241 		rdev->config.cik.max_texture_channel_caches = 16;
3242 		rdev->config.cik.max_gprs = 256;
3243 		rdev->config.cik.max_gs_threads = 32;
3244 		rdev->config.cik.max_hw_contexts = 8;
3245 
3246 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3247 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3248 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3249 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3250 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3251 		break;
3252 	case CHIP_KAVERI:
3253 		rdev->config.cik.max_shader_engines = 1;
3254 		rdev->config.cik.max_tile_pipes = 4;
3255 		if ((rdev->pdev->device == 0x1304) ||
3256 		    (rdev->pdev->device == 0x1305) ||
3257 		    (rdev->pdev->device == 0x130C) ||
3258 		    (rdev->pdev->device == 0x130F) ||
3259 		    (rdev->pdev->device == 0x1310) ||
3260 		    (rdev->pdev->device == 0x1311) ||
3261 		    (rdev->pdev->device == 0x131C)) {
3262 			rdev->config.cik.max_cu_per_sh = 8;
3263 			rdev->config.cik.max_backends_per_se = 2;
3264 		} else if ((rdev->pdev->device == 0x1309) ||
3265 			   (rdev->pdev->device == 0x130A) ||
3266 			   (rdev->pdev->device == 0x130D) ||
3267 			   (rdev->pdev->device == 0x1313) ||
3268 			   (rdev->pdev->device == 0x131D)) {
3269 			rdev->config.cik.max_cu_per_sh = 6;
3270 			rdev->config.cik.max_backends_per_se = 2;
3271 		} else if ((rdev->pdev->device == 0x1306) ||
3272 			   (rdev->pdev->device == 0x1307) ||
3273 			   (rdev->pdev->device == 0x130B) ||
3274 			   (rdev->pdev->device == 0x130E) ||
3275 			   (rdev->pdev->device == 0x1315) ||
3276 			   (rdev->pdev->device == 0x1318) ||
3277 			   (rdev->pdev->device == 0x131B)) {
3278 			rdev->config.cik.max_cu_per_sh = 4;
3279 			rdev->config.cik.max_backends_per_se = 1;
3280 		} else {
3281 			rdev->config.cik.max_cu_per_sh = 3;
3282 			rdev->config.cik.max_backends_per_se = 1;
3283 		}
3284 		rdev->config.cik.max_sh_per_se = 1;
3285 		rdev->config.cik.max_texture_channel_caches = 4;
3286 		rdev->config.cik.max_gprs = 256;
3287 		rdev->config.cik.max_gs_threads = 16;
3288 		rdev->config.cik.max_hw_contexts = 8;
3289 
3290 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3291 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3292 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3293 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3294 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3295 		break;
3296 	case CHIP_KABINI:
3297 	case CHIP_MULLINS:
3298 	default:
3299 		rdev->config.cik.max_shader_engines = 1;
3300 		rdev->config.cik.max_tile_pipes = 2;
3301 		rdev->config.cik.max_cu_per_sh = 2;
3302 		rdev->config.cik.max_sh_per_se = 1;
3303 		rdev->config.cik.max_backends_per_se = 1;
3304 		rdev->config.cik.max_texture_channel_caches = 2;
3305 		rdev->config.cik.max_gprs = 256;
3306 		rdev->config.cik.max_gs_threads = 16;
3307 		rdev->config.cik.max_hw_contexts = 8;
3308 
3309 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3310 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3311 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3312 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3313 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3314 		break;
3315 	}
3316 
3317 	/* Initialize HDP */
3318 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3319 		WREG32((0x2c14 + j), 0x00000000);
3320 		WREG32((0x2c18 + j), 0x00000000);
3321 		WREG32((0x2c1c + j), 0x00000000);
3322 		WREG32((0x2c20 + j), 0x00000000);
3323 		WREG32((0x2c24 + j), 0x00000000);
3324 	}
3325 
3326 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3327 	WREG32(SRBM_INT_CNTL, 0x1);
3328 	WREG32(SRBM_INT_ACK, 0x1);
3329 
3330 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3331 
3332 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3333 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3334 
3335 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3336 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3337 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3338 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3339 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3340 		rdev->config.cik.mem_row_size_in_kb = 4;
3341 	/* XXX use MC settings? */
3342 	rdev->config.cik.shader_engine_tile_size = 32;
3343 	rdev->config.cik.num_gpus = 1;
3344 	rdev->config.cik.multi_gpu_tile_size = 64;
3345 
3346 	/* fix up row size */
3347 	gb_addr_config &= ~ROW_SIZE_MASK;
3348 	switch (rdev->config.cik.mem_row_size_in_kb) {
3349 	case 1:
3350 	default:
3351 		gb_addr_config |= ROW_SIZE(0);
3352 		break;
3353 	case 2:
3354 		gb_addr_config |= ROW_SIZE(1);
3355 		break;
3356 	case 4:
3357 		gb_addr_config |= ROW_SIZE(2);
3358 		break;
3359 	}
3360 
3361 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3362 	 * not have bank info, so create a custom tiling dword.
3363 	 * bits 3:0   num_pipes
3364 	 * bits 7:4   num_banks
3365 	 * bits 11:8  group_size
3366 	 * bits 15:12 row_size
3367 	 */
3368 	rdev->config.cik.tile_config = 0;
3369 	switch (rdev->config.cik.num_tile_pipes) {
3370 	case 1:
3371 		rdev->config.cik.tile_config |= (0 << 0);
3372 		break;
3373 	case 2:
3374 		rdev->config.cik.tile_config |= (1 << 0);
3375 		break;
3376 	case 4:
3377 		rdev->config.cik.tile_config |= (2 << 0);
3378 		break;
3379 	case 8:
3380 	default:
3381 		/* XXX what about 12? */
3382 		rdev->config.cik.tile_config |= (3 << 0);
3383 		break;
3384 	}
3385 	rdev->config.cik.tile_config |=
3386 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3387 	rdev->config.cik.tile_config |=
3388 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3389 	rdev->config.cik.tile_config |=
3390 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3391 
3392 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3393 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3394 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3395 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3396 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3397 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3398 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3399 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3400 
3401 	cik_tiling_mode_table_init(rdev);
3402 
3403 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3404 		     rdev->config.cik.max_sh_per_se,
3405 		     rdev->config.cik.max_backends_per_se);
3406 
3407 	rdev->config.cik.active_cus = 0;
3408 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3409 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3410 			rdev->config.cik.active_cus +=
3411 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3412 		}
3413 	}
3414 
3415 	/* set HW defaults for 3D engine */
3416 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3417 
3418 	mutex_lock(&rdev->grbm_idx_mutex);
3419 	/*
3420 	 * making sure that the following register writes will be broadcasted
3421 	 * to all the shaders
3422 	 */
3423 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3424 	WREG32(SX_DEBUG_1, 0x20);
3425 
3426 	WREG32(TA_CNTL_AUX, 0x00010000);
3427 
3428 	tmp = RREG32(SPI_CONFIG_CNTL);
3429 	tmp |= 0x03000000;
3430 	WREG32(SPI_CONFIG_CNTL, tmp);
3431 
3432 	WREG32(SQ_CONFIG, 1);
3433 
3434 	WREG32(DB_DEBUG, 0);
3435 
3436 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3437 	tmp |= 0x00000400;
3438 	WREG32(DB_DEBUG2, tmp);
3439 
3440 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3441 	tmp |= 0x00020200;
3442 	WREG32(DB_DEBUG3, tmp);
3443 
3444 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3445 	tmp |= 0x00018208;
3446 	WREG32(CB_HW_CONTROL, tmp);
3447 
3448 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3449 
3450 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3451 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3452 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3453 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3454 
3455 	WREG32(VGT_NUM_INSTANCES, 1);
3456 
3457 	WREG32(CP_PERFMON_CNTL, 0);
3458 
3459 	WREG32(SQ_CONFIG, 0);
3460 
3461 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3462 					  FORCE_EOV_MAX_REZ_CNT(255)));
3463 
3464 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3465 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3466 
3467 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3468 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3469 
3470 	tmp = RREG32(HDP_MISC_CNTL);
3471 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3472 	WREG32(HDP_MISC_CNTL, tmp);
3473 
3474 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3475 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3476 
3477 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3478 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3479 	mutex_unlock(&rdev->grbm_idx_mutex);
3480 
3481 	udelay(50);
3482 }
3483 
3484 /*
3485  * GPU scratch registers helpers function.
3486  */
3487 /**
3488  * cik_scratch_init - setup driver info for CP scratch regs
3489  *
3490  * @rdev: radeon_device pointer
3491  *
3492  * Set up the number and offset of the CP scratch registers.
3493  * NOTE: use of CP scratch registers is a legacy inferface and
3494  * is not used by default on newer asics (r6xx+).  On newer asics,
3495  * memory buffers are used for fences rather than scratch regs.
3496  */
3497 static void cik_scratch_init(struct radeon_device *rdev)
3498 {
3499 	int i;
3500 
3501 	rdev->scratch.num_reg = 7;
3502 	rdev->scratch.reg_base = SCRATCH_REG0;
3503 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3504 		rdev->scratch.free[i] = true;
3505 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3506 	}
3507 }
3508 
3509 /**
3510  * cik_ring_test - basic gfx ring test
3511  *
3512  * @rdev: radeon_device pointer
3513  * @ring: radeon_ring structure holding ring information
3514  *
3515  * Allocate a scratch register and write to it using the gfx ring (CIK).
3516  * Provides a basic gfx ring test to verify that the ring is working.
3517  * Used by cik_cp_gfx_resume();
3518  * Returns 0 on success, error on failure.
3519  */
3520 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3521 {
3522 	uint32_t scratch;
3523 	uint32_t tmp = 0;
3524 	unsigned i;
3525 	int r;
3526 
3527 	r = radeon_scratch_get(rdev, &scratch);
3528 	if (r) {
3529 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3530 		return r;
3531 	}
3532 	WREG32(scratch, 0xCAFEDEAD);
3533 	r = radeon_ring_lock(rdev, ring, 3);
3534 	if (r) {
3535 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3536 		radeon_scratch_free(rdev, scratch);
3537 		return r;
3538 	}
3539 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3540 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3541 	radeon_ring_write(ring, 0xDEADBEEF);
3542 	radeon_ring_unlock_commit(rdev, ring, false);
3543 
3544 	for (i = 0; i < rdev->usec_timeout; i++) {
3545 		tmp = RREG32(scratch);
3546 		if (tmp == 0xDEADBEEF)
3547 			break;
3548 		DRM_UDELAY(1);
3549 	}
3550 	if (i < rdev->usec_timeout) {
3551 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3552 	} else {
3553 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3554 			  ring->idx, scratch, tmp);
3555 		r = -EINVAL;
3556 	}
3557 	radeon_scratch_free(rdev, scratch);
3558 	return r;
3559 }
3560 
3561 /**
3562  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3563  *
3564  * @rdev: radeon_device pointer
3565  * @ridx: radeon ring index
3566  *
3567  * Emits an hdp flush on the cp.
3568  */
3569 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3570 				       int ridx)
3571 {
3572 	struct radeon_ring *ring = &rdev->ring[ridx];
3573 	u32 ref_and_mask;
3574 
3575 	switch (ring->idx) {
3576 	case CAYMAN_RING_TYPE_CP1_INDEX:
3577 	case CAYMAN_RING_TYPE_CP2_INDEX:
3578 	default:
3579 		switch (ring->me) {
3580 		case 0:
3581 			ref_and_mask = CP2 << ring->pipe;
3582 			break;
3583 		case 1:
3584 			ref_and_mask = CP6 << ring->pipe;
3585 			break;
3586 		default:
3587 			return;
3588 		}
3589 		break;
3590 	case RADEON_RING_TYPE_GFX_INDEX:
3591 		ref_and_mask = CP0;
3592 		break;
3593 	}
3594 
3595 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3596 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3597 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3598 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3599 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3600 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3601 	radeon_ring_write(ring, ref_and_mask);
3602 	radeon_ring_write(ring, ref_and_mask);
3603 	radeon_ring_write(ring, 0x20); /* poll interval */
3604 }
3605 
3606 /**
3607  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3608  *
3609  * @rdev: radeon_device pointer
3610  * @fence: radeon fence object
3611  *
3612  * Emits a fence sequnce number on the gfx ring and flushes
3613  * GPU caches.
3614  */
3615 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3616 			     struct radeon_fence *fence)
3617 {
3618 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3619 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3620 
3621 	/* Workaround for cache flush problems. First send a dummy EOP
3622 	 * event down the pipe with seq one below.
3623 	 */
3624 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626 				 EOP_TC_ACTION_EN |
3627 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628 				 EVENT_INDEX(5)));
3629 	radeon_ring_write(ring, addr & 0xfffffffc);
3630 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3631 				DATA_SEL(1) | INT_SEL(0));
3632 	radeon_ring_write(ring, fence->seq - 1);
3633 	radeon_ring_write(ring, 0);
3634 
3635 	/* Then send the real EOP event down the pipe. */
3636 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3637 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3638 				 EOP_TC_ACTION_EN |
3639 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3640 				 EVENT_INDEX(5)));
3641 	radeon_ring_write(ring, addr & 0xfffffffc);
3642 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3643 	radeon_ring_write(ring, fence->seq);
3644 	radeon_ring_write(ring, 0);
3645 }
3646 
3647 /**
3648  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3649  *
3650  * @rdev: radeon_device pointer
3651  * @fence: radeon fence object
3652  *
3653  * Emits a fence sequnce number on the compute ring and flushes
3654  * GPU caches.
3655  */
3656 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3657 				 struct radeon_fence *fence)
3658 {
3659 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3660 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3661 
3662 	/* RELEASE_MEM - flush caches, send int */
3663 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3664 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3665 				 EOP_TC_ACTION_EN |
3666 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3667 				 EVENT_INDEX(5)));
3668 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3669 	radeon_ring_write(ring, addr & 0xfffffffc);
3670 	radeon_ring_write(ring, upper_32_bits(addr));
3671 	radeon_ring_write(ring, fence->seq);
3672 	radeon_ring_write(ring, 0);
3673 }
3674 
3675 /**
3676  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3677  *
3678  * @rdev: radeon_device pointer
3679  * @ring: radeon ring buffer object
3680  * @semaphore: radeon semaphore object
3681  * @emit_wait: Is this a sempahore wait?
3682  *
3683  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3684  * from running ahead of semaphore waits.
3685  */
3686 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3687 			     struct radeon_ring *ring,
3688 			     struct radeon_semaphore *semaphore,
3689 			     bool emit_wait)
3690 {
3691 	uint64_t addr = semaphore->gpu_addr;
3692 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3693 
3694 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3695 	radeon_ring_write(ring, lower_32_bits(addr));
3696 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3697 
3698 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3699 		/* Prevent the PFP from running ahead of the semaphore wait */
3700 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3701 		radeon_ring_write(ring, 0x0);
3702 	}
3703 
3704 	return true;
3705 }
3706 
3707 /**
3708  * cik_copy_cpdma - copy pages using the CP DMA engine
3709  *
3710  * @rdev: radeon_device pointer
3711  * @src_offset: src GPU address
3712  * @dst_offset: dst GPU address
3713  * @num_gpu_pages: number of GPU pages to xfer
3714  * @resv: reservation object to sync to
3715  *
3716  * Copy GPU paging using the CP DMA engine (CIK+).
3717  * Used by the radeon ttm implementation to move pages if
3718  * registered as the asic copy callback.
3719  */
3720 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3721 				    uint64_t src_offset, uint64_t dst_offset,
3722 				    unsigned num_gpu_pages,
3723 				    struct reservation_object *resv)
3724 {
3725 	struct radeon_fence *fence;
3726 	struct radeon_sync sync;
3727 	int ring_index = rdev->asic->copy.blit_ring_index;
3728 	struct radeon_ring *ring = &rdev->ring[ring_index];
3729 	u32 size_in_bytes, cur_size_in_bytes, control;
3730 	int i, num_loops;
3731 	int r = 0;
3732 
3733 	radeon_sync_create(&sync);
3734 
3735 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3736 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3737 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3738 	if (r) {
3739 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3740 		radeon_sync_free(rdev, &sync, NULL);
3741 		return ERR_PTR(r);
3742 	}
3743 
3744 	radeon_sync_resv(rdev, &sync, resv, false);
3745 	radeon_sync_rings(rdev, &sync, ring->idx);
3746 
3747 	for (i = 0; i < num_loops; i++) {
3748 		cur_size_in_bytes = size_in_bytes;
3749 		if (cur_size_in_bytes > 0x1fffff)
3750 			cur_size_in_bytes = 0x1fffff;
3751 		size_in_bytes -= cur_size_in_bytes;
3752 		control = 0;
3753 		if (size_in_bytes == 0)
3754 			control |= PACKET3_DMA_DATA_CP_SYNC;
3755 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3756 		radeon_ring_write(ring, control);
3757 		radeon_ring_write(ring, lower_32_bits(src_offset));
3758 		radeon_ring_write(ring, upper_32_bits(src_offset));
3759 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3760 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3761 		radeon_ring_write(ring, cur_size_in_bytes);
3762 		src_offset += cur_size_in_bytes;
3763 		dst_offset += cur_size_in_bytes;
3764 	}
3765 
3766 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3767 	if (r) {
3768 		radeon_ring_unlock_undo(rdev, ring);
3769 		radeon_sync_free(rdev, &sync, NULL);
3770 		return ERR_PTR(r);
3771 	}
3772 
3773 	radeon_ring_unlock_commit(rdev, ring, false);
3774 	radeon_sync_free(rdev, &sync, fence);
3775 
3776 	return fence;
3777 }
3778 
3779 /*
3780  * IB stuff
3781  */
3782 /**
3783  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3784  *
3785  * @rdev: radeon_device pointer
3786  * @ib: radeon indirect buffer object
3787  *
3788  * Emits a DE (drawing engine) or CE (constant engine) IB
3789  * on the gfx ring.  IBs are usually generated by userspace
3790  * acceleration drivers and submitted to the kernel for
3791  * scheduling on the ring.  This function schedules the IB
3792  * on the gfx ring for execution by the GPU.
3793  */
3794 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3795 {
3796 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3797 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3798 	u32 header, control = INDIRECT_BUFFER_VALID;
3799 
3800 	if (ib->is_const_ib) {
3801 		/* set switch buffer packet before const IB */
3802 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3803 		radeon_ring_write(ring, 0);
3804 
3805 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3806 	} else {
3807 		u32 next_rptr;
3808 		if (ring->rptr_save_reg) {
3809 			next_rptr = ring->wptr + 3 + 4;
3810 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3811 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3812 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3813 			radeon_ring_write(ring, next_rptr);
3814 		} else if (rdev->wb.enabled) {
3815 			next_rptr = ring->wptr + 5 + 4;
3816 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3817 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3818 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3819 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3820 			radeon_ring_write(ring, next_rptr);
3821 		}
3822 
3823 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3824 	}
3825 
3826 	control |= ib->length_dw | (vm_id << 24);
3827 
3828 	radeon_ring_write(ring, header);
3829 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3830 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3831 	radeon_ring_write(ring, control);
3832 }
3833 
3834 /**
3835  * cik_ib_test - basic gfx ring IB test
3836  *
3837  * @rdev: radeon_device pointer
3838  * @ring: radeon_ring structure holding ring information
3839  *
3840  * Allocate an IB and execute it on the gfx ring (CIK).
3841  * Provides a basic gfx ring test to verify that IBs are working.
3842  * Returns 0 on success, error on failure.
3843  */
3844 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3845 {
3846 	struct radeon_ib ib;
3847 	uint32_t scratch;
3848 	uint32_t tmp = 0;
3849 	unsigned i;
3850 	int r;
3851 
3852 	r = radeon_scratch_get(rdev, &scratch);
3853 	if (r) {
3854 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3855 		return r;
3856 	}
3857 	WREG32(scratch, 0xCAFEDEAD);
3858 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3859 	if (r) {
3860 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3861 		radeon_scratch_free(rdev, scratch);
3862 		return r;
3863 	}
3864 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3865 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3866 	ib.ptr[2] = 0xDEADBEEF;
3867 	ib.length_dw = 3;
3868 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3869 	if (r) {
3870 		radeon_scratch_free(rdev, scratch);
3871 		radeon_ib_free(rdev, &ib);
3872 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3873 		return r;
3874 	}
3875 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3876 		RADEON_USEC_IB_TEST_TIMEOUT));
3877 	if (r < 0) {
3878 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3879 		radeon_scratch_free(rdev, scratch);
3880 		radeon_ib_free(rdev, &ib);
3881 		return r;
3882 	} else if (r == 0) {
3883 		DRM_ERROR("radeon: fence wait timed out.\n");
3884 		radeon_scratch_free(rdev, scratch);
3885 		radeon_ib_free(rdev, &ib);
3886 		return -ETIMEDOUT;
3887 	}
3888 	r = 0;
3889 	for (i = 0; i < rdev->usec_timeout; i++) {
3890 		tmp = RREG32(scratch);
3891 		if (tmp == 0xDEADBEEF)
3892 			break;
3893 		DRM_UDELAY(1);
3894 	}
3895 	if (i < rdev->usec_timeout) {
3896 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3897 	} else {
3898 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3899 			  scratch, tmp);
3900 		r = -EINVAL;
3901 	}
3902 	radeon_scratch_free(rdev, scratch);
3903 	radeon_ib_free(rdev, &ib);
3904 	return r;
3905 }
3906 
3907 /*
3908  * CP.
3909  * On CIK, gfx and compute now have independant command processors.
3910  *
3911  * GFX
3912  * Gfx consists of a single ring and can process both gfx jobs and
3913  * compute jobs.  The gfx CP consists of three microengines (ME):
3914  * PFP - Pre-Fetch Parser
3915  * ME - Micro Engine
3916  * CE - Constant Engine
3917  * The PFP and ME make up what is considered the Drawing Engine (DE).
3918  * The CE is an asynchronous engine used for updating buffer desciptors
3919  * used by the DE so that they can be loaded into cache in parallel
3920  * while the DE is processing state update packets.
3921  *
3922  * Compute
3923  * The compute CP consists of two microengines (ME):
3924  * MEC1 - Compute MicroEngine 1
3925  * MEC2 - Compute MicroEngine 2
3926  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3927  * The queues are exposed to userspace and are programmed directly
3928  * by the compute runtime.
3929  */
3930 /**
3931  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3932  *
3933  * @rdev: radeon_device pointer
3934  * @enable: enable or disable the MEs
3935  *
3936  * Halts or unhalts the gfx MEs.
3937  */
3938 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3939 {
3940 	if (enable)
3941 		WREG32(CP_ME_CNTL, 0);
3942 	else {
3943 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3944 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3945 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3946 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3947 	}
3948 	udelay(50);
3949 }
3950 
3951 /**
3952  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3953  *
3954  * @rdev: radeon_device pointer
3955  *
3956  * Loads the gfx PFP, ME, and CE ucode.
3957  * Returns 0 for success, -EINVAL if the ucode is not available.
3958  */
3959 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3960 {
3961 	int i;
3962 
3963 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3964 		return -EINVAL;
3965 
3966 	cik_cp_gfx_enable(rdev, false);
3967 
3968 	if (rdev->new_fw) {
3969 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3970 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3971 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3972 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3973 		const struct gfx_firmware_header_v1_0 *me_hdr =
3974 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3975 		const __le32 *fw_data;
3976 		u32 fw_size;
3977 
3978 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3979 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3980 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3981 
3982 		/* PFP */
3983 		fw_data = (const __le32 *)
3984 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3985 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3986 		WREG32(CP_PFP_UCODE_ADDR, 0);
3987 		for (i = 0; i < fw_size; i++)
3988 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3989 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3990 
3991 		/* CE */
3992 		fw_data = (const __le32 *)
3993 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3994 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3995 		WREG32(CP_CE_UCODE_ADDR, 0);
3996 		for (i = 0; i < fw_size; i++)
3997 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3998 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3999 
4000 		/* ME */
4001 		fw_data = (const __be32 *)
4002 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4003 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4004 		WREG32(CP_ME_RAM_WADDR, 0);
4005 		for (i = 0; i < fw_size; i++)
4006 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4007 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4008 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4009 	} else {
4010 		const __be32 *fw_data;
4011 
4012 		/* PFP */
4013 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4014 		WREG32(CP_PFP_UCODE_ADDR, 0);
4015 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4016 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4017 		WREG32(CP_PFP_UCODE_ADDR, 0);
4018 
4019 		/* CE */
4020 		fw_data = (const __be32 *)rdev->ce_fw->data;
4021 		WREG32(CP_CE_UCODE_ADDR, 0);
4022 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4023 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4024 		WREG32(CP_CE_UCODE_ADDR, 0);
4025 
4026 		/* ME */
4027 		fw_data = (const __be32 *)rdev->me_fw->data;
4028 		WREG32(CP_ME_RAM_WADDR, 0);
4029 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4030 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4031 		WREG32(CP_ME_RAM_WADDR, 0);
4032 	}
4033 
4034 	return 0;
4035 }
4036 
4037 /**
4038  * cik_cp_gfx_start - start the gfx ring
4039  *
4040  * @rdev: radeon_device pointer
4041  *
4042  * Enables the ring and loads the clear state context and other
4043  * packets required to init the ring.
4044  * Returns 0 for success, error for failure.
4045  */
4046 static int cik_cp_gfx_start(struct radeon_device *rdev)
4047 {
4048 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4049 	int r, i;
4050 
4051 	/* init the CP */
4052 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4053 	WREG32(CP_ENDIAN_SWAP, 0);
4054 	WREG32(CP_DEVICE_ID, 1);
4055 
4056 	cik_cp_gfx_enable(rdev, true);
4057 
4058 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4059 	if (r) {
4060 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4061 		return r;
4062 	}
4063 
4064 	/* init the CE partitions.  CE only used for gfx on CIK */
4065 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4066 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4067 	radeon_ring_write(ring, 0x8000);
4068 	radeon_ring_write(ring, 0x8000);
4069 
4070 	/* setup clear context state */
4071 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4072 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4073 
4074 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4075 	radeon_ring_write(ring, 0x80000000);
4076 	radeon_ring_write(ring, 0x80000000);
4077 
4078 	for (i = 0; i < cik_default_size; i++)
4079 		radeon_ring_write(ring, cik_default_state[i]);
4080 
4081 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4082 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4083 
4084 	/* set clear context state */
4085 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4086 	radeon_ring_write(ring, 0);
4087 
4088 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4089 	radeon_ring_write(ring, 0x00000316);
4090 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4091 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4092 
4093 	radeon_ring_unlock_commit(rdev, ring, false);
4094 
4095 	return 0;
4096 }
4097 
4098 /**
4099  * cik_cp_gfx_fini - stop the gfx ring
4100  *
4101  * @rdev: radeon_device pointer
4102  *
4103  * Stop the gfx ring and tear down the driver ring
4104  * info.
4105  */
4106 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4107 {
4108 	cik_cp_gfx_enable(rdev, false);
4109 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4110 }
4111 
4112 /**
4113  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4114  *
4115  * @rdev: radeon_device pointer
4116  *
4117  * Program the location and size of the gfx ring buffer
4118  * and test it to make sure it's working.
4119  * Returns 0 for success, error for failure.
4120  */
4121 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4122 {
4123 	struct radeon_ring *ring;
4124 	u32 tmp;
4125 	u32 rb_bufsz;
4126 	u64 rb_addr;
4127 	int r;
4128 
4129 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4130 	if (rdev->family != CHIP_HAWAII)
4131 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4132 
4133 	/* Set the write pointer delay */
4134 	WREG32(CP_RB_WPTR_DELAY, 0);
4135 
4136 	/* set the RB to use vmid 0 */
4137 	WREG32(CP_RB_VMID, 0);
4138 
4139 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4140 
4141 	/* ring 0 - compute and gfx */
4142 	/* Set ring buffer size */
4143 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4144 	rb_bufsz = order_base_2(ring->ring_size / 8);
4145 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4146 #ifdef __BIG_ENDIAN
4147 	tmp |= BUF_SWAP_32BIT;
4148 #endif
4149 	WREG32(CP_RB0_CNTL, tmp);
4150 
4151 	/* Initialize the ring buffer's read and write pointers */
4152 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4153 	ring->wptr = 0;
4154 	WREG32(CP_RB0_WPTR, ring->wptr);
4155 
4156 	/* set the wb address wether it's enabled or not */
4157 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4158 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4159 
4160 	/* scratch register shadowing is no longer supported */
4161 	WREG32(SCRATCH_UMSK, 0);
4162 
4163 	if (!rdev->wb.enabled)
4164 		tmp |= RB_NO_UPDATE;
4165 
4166 	mdelay(1);
4167 	WREG32(CP_RB0_CNTL, tmp);
4168 
4169 	rb_addr = ring->gpu_addr >> 8;
4170 	WREG32(CP_RB0_BASE, rb_addr);
4171 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4172 
4173 	/* start the ring */
4174 	cik_cp_gfx_start(rdev);
4175 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4176 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4177 	if (r) {
4178 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4179 		return r;
4180 	}
4181 
4182 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4183 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4184 
4185 	return 0;
4186 }
4187 
4188 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4189 		     struct radeon_ring *ring)
4190 {
4191 	u32 rptr;
4192 
4193 	if (rdev->wb.enabled)
4194 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4195 	else
4196 		rptr = RREG32(CP_RB0_RPTR);
4197 
4198 	return rptr;
4199 }
4200 
4201 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4202 		     struct radeon_ring *ring)
4203 {
4204 	u32 wptr;
4205 
4206 	wptr = RREG32(CP_RB0_WPTR);
4207 
4208 	return wptr;
4209 }
4210 
4211 void cik_gfx_set_wptr(struct radeon_device *rdev,
4212 		      struct radeon_ring *ring)
4213 {
4214 	WREG32(CP_RB0_WPTR, ring->wptr);
4215 	(void)RREG32(CP_RB0_WPTR);
4216 }
4217 
4218 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4219 			 struct radeon_ring *ring)
4220 {
4221 	u32 rptr;
4222 
4223 	if (rdev->wb.enabled) {
4224 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4225 	} else {
4226 		mutex_lock(&rdev->srbm_mutex);
4227 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4228 		rptr = RREG32(CP_HQD_PQ_RPTR);
4229 		cik_srbm_select(rdev, 0, 0, 0, 0);
4230 		mutex_unlock(&rdev->srbm_mutex);
4231 	}
4232 
4233 	return rptr;
4234 }
4235 
4236 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4237 			 struct radeon_ring *ring)
4238 {
4239 	u32 wptr;
4240 
4241 	if (rdev->wb.enabled) {
4242 		/* XXX check if swapping is necessary on BE */
4243 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4244 	} else {
4245 		mutex_lock(&rdev->srbm_mutex);
4246 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4247 		wptr = RREG32(CP_HQD_PQ_WPTR);
4248 		cik_srbm_select(rdev, 0, 0, 0, 0);
4249 		mutex_unlock(&rdev->srbm_mutex);
4250 	}
4251 
4252 	return wptr;
4253 }
4254 
4255 void cik_compute_set_wptr(struct radeon_device *rdev,
4256 			  struct radeon_ring *ring)
4257 {
4258 	/* XXX check if swapping is necessary on BE */
4259 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4260 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4261 }
4262 
4263 static void cik_compute_stop(struct radeon_device *rdev,
4264 			     struct radeon_ring *ring)
4265 {
4266 	u32 j, tmp;
4267 
4268 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4269 	/* Disable wptr polling. */
4270 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4271 	tmp &= ~WPTR_POLL_EN;
4272 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4273 	/* Disable HQD. */
4274 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4275 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4276 		for (j = 0; j < rdev->usec_timeout; j++) {
4277 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4278 				break;
4279 			udelay(1);
4280 		}
4281 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4282 		WREG32(CP_HQD_PQ_RPTR, 0);
4283 		WREG32(CP_HQD_PQ_WPTR, 0);
4284 	}
4285 	cik_srbm_select(rdev, 0, 0, 0, 0);
4286 }
4287 
4288 /**
4289  * cik_cp_compute_enable - enable/disable the compute CP MEs
4290  *
4291  * @rdev: radeon_device pointer
4292  * @enable: enable or disable the MEs
4293  *
4294  * Halts or unhalts the compute MEs.
4295  */
4296 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4297 {
4298 	if (enable)
4299 		WREG32(CP_MEC_CNTL, 0);
4300 	else {
4301 		/*
4302 		 * To make hibernation reliable we need to clear compute ring
4303 		 * configuration before halting the compute ring.
4304 		 */
4305 		mutex_lock(&rdev->srbm_mutex);
4306 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4307 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4308 		mutex_unlock(&rdev->srbm_mutex);
4309 
4310 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4311 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4312 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4313 	}
4314 	udelay(50);
4315 }
4316 
4317 /**
4318  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4319  *
4320  * @rdev: radeon_device pointer
4321  *
4322  * Loads the compute MEC1&2 ucode.
4323  * Returns 0 for success, -EINVAL if the ucode is not available.
4324  */
4325 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4326 {
4327 	int i;
4328 
4329 	if (!rdev->mec_fw)
4330 		return -EINVAL;
4331 
4332 	cik_cp_compute_enable(rdev, false);
4333 
4334 	if (rdev->new_fw) {
4335 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4336 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4337 		const __le32 *fw_data;
4338 		u32 fw_size;
4339 
4340 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4341 
4342 		/* MEC1 */
4343 		fw_data = (const __le32 *)
4344 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4345 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4346 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347 		for (i = 0; i < fw_size; i++)
4348 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4349 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4350 
4351 		/* MEC2 */
4352 		if (rdev->family == CHIP_KAVERI) {
4353 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4354 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4355 
4356 			fw_data = (const __le32 *)
4357 				(rdev->mec2_fw->data +
4358 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4359 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4360 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4361 			for (i = 0; i < fw_size; i++)
4362 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4363 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4364 		}
4365 	} else {
4366 		const __be32 *fw_data;
4367 
4368 		/* MEC1 */
4369 		fw_data = (const __be32 *)rdev->mec_fw->data;
4370 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4371 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4372 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4373 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4374 
4375 		if (rdev->family == CHIP_KAVERI) {
4376 			/* MEC2 */
4377 			fw_data = (const __be32 *)rdev->mec_fw->data;
4378 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4379 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4380 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4381 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4382 		}
4383 	}
4384 
4385 	return 0;
4386 }
4387 
4388 /**
4389  * cik_cp_compute_start - start the compute queues
4390  *
4391  * @rdev: radeon_device pointer
4392  *
4393  * Enable the compute queues.
4394  * Returns 0 for success, error for failure.
4395  */
4396 static int cik_cp_compute_start(struct radeon_device *rdev)
4397 {
4398 	cik_cp_compute_enable(rdev, true);
4399 
4400 	return 0;
4401 }
4402 
4403 /**
4404  * cik_cp_compute_fini - stop the compute queues
4405  *
4406  * @rdev: radeon_device pointer
4407  *
4408  * Stop the compute queues and tear down the driver queue
4409  * info.
4410  */
4411 static void cik_cp_compute_fini(struct radeon_device *rdev)
4412 {
4413 	int i, idx, r;
4414 
4415 	cik_cp_compute_enable(rdev, false);
4416 
4417 	for (i = 0; i < 2; i++) {
4418 		if (i == 0)
4419 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4420 		else
4421 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4422 
4423 		if (rdev->ring[idx].mqd_obj) {
4424 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4425 			if (unlikely(r != 0))
4426 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4427 
4428 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4429 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4430 
4431 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4432 			rdev->ring[idx].mqd_obj = NULL;
4433 		}
4434 	}
4435 }
4436 
4437 static void cik_mec_fini(struct radeon_device *rdev)
4438 {
4439 	int r;
4440 
4441 	if (rdev->mec.hpd_eop_obj) {
4442 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4443 		if (unlikely(r != 0))
4444 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4445 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4446 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4447 
4448 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4449 		rdev->mec.hpd_eop_obj = NULL;
4450 	}
4451 }
4452 
4453 #define MEC_HPD_SIZE 2048
4454 
4455 static int cik_mec_init(struct radeon_device *rdev)
4456 {
4457 	int r;
4458 	u32 *hpd;
4459 
4460 	/*
4461 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4462 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4463 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4464 	 * be handled by KFD
4465 	 */
4466 	rdev->mec.num_mec = 1;
4467 	rdev->mec.num_pipe = 1;
4468 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4469 
4470 	if (rdev->mec.hpd_eop_obj == NULL) {
4471 		r = radeon_bo_create(rdev,
4472 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4473 				     PAGE_SIZE, true,
4474 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4475 				     &rdev->mec.hpd_eop_obj);
4476 		if (r) {
4477 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4478 			return r;
4479 		}
4480 	}
4481 
4482 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4483 	if (unlikely(r != 0)) {
4484 		cik_mec_fini(rdev);
4485 		return r;
4486 	}
4487 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4488 			  &rdev->mec.hpd_eop_gpu_addr);
4489 	if (r) {
4490 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4491 		cik_mec_fini(rdev);
4492 		return r;
4493 	}
4494 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4495 	if (r) {
4496 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4497 		cik_mec_fini(rdev);
4498 		return r;
4499 	}
4500 
4501 	/* clear memory.  Not sure if this is required or not */
4502 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4503 
4504 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4505 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4506 
4507 	return 0;
4508 }
4509 
4510 struct hqd_registers
4511 {
4512 	u32 cp_mqd_base_addr;
4513 	u32 cp_mqd_base_addr_hi;
4514 	u32 cp_hqd_active;
4515 	u32 cp_hqd_vmid;
4516 	u32 cp_hqd_persistent_state;
4517 	u32 cp_hqd_pipe_priority;
4518 	u32 cp_hqd_queue_priority;
4519 	u32 cp_hqd_quantum;
4520 	u32 cp_hqd_pq_base;
4521 	u32 cp_hqd_pq_base_hi;
4522 	u32 cp_hqd_pq_rptr;
4523 	u32 cp_hqd_pq_rptr_report_addr;
4524 	u32 cp_hqd_pq_rptr_report_addr_hi;
4525 	u32 cp_hqd_pq_wptr_poll_addr;
4526 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4527 	u32 cp_hqd_pq_doorbell_control;
4528 	u32 cp_hqd_pq_wptr;
4529 	u32 cp_hqd_pq_control;
4530 	u32 cp_hqd_ib_base_addr;
4531 	u32 cp_hqd_ib_base_addr_hi;
4532 	u32 cp_hqd_ib_rptr;
4533 	u32 cp_hqd_ib_control;
4534 	u32 cp_hqd_iq_timer;
4535 	u32 cp_hqd_iq_rptr;
4536 	u32 cp_hqd_dequeue_request;
4537 	u32 cp_hqd_dma_offload;
4538 	u32 cp_hqd_sema_cmd;
4539 	u32 cp_hqd_msg_type;
4540 	u32 cp_hqd_atomic0_preop_lo;
4541 	u32 cp_hqd_atomic0_preop_hi;
4542 	u32 cp_hqd_atomic1_preop_lo;
4543 	u32 cp_hqd_atomic1_preop_hi;
4544 	u32 cp_hqd_hq_scheduler0;
4545 	u32 cp_hqd_hq_scheduler1;
4546 	u32 cp_mqd_control;
4547 };
4548 
4549 struct bonaire_mqd
4550 {
4551 	u32 header;
4552 	u32 dispatch_initiator;
4553 	u32 dimensions[3];
4554 	u32 start_idx[3];
4555 	u32 num_threads[3];
4556 	u32 pipeline_stat_enable;
4557 	u32 perf_counter_enable;
4558 	u32 pgm[2];
4559 	u32 tba[2];
4560 	u32 tma[2];
4561 	u32 pgm_rsrc[2];
4562 	u32 vmid;
4563 	u32 resource_limits;
4564 	u32 static_thread_mgmt01[2];
4565 	u32 tmp_ring_size;
4566 	u32 static_thread_mgmt23[2];
4567 	u32 restart[3];
4568 	u32 thread_trace_enable;
4569 	u32 reserved1;
4570 	u32 user_data[16];
4571 	u32 vgtcs_invoke_count[2];
4572 	struct hqd_registers queue_state;
4573 	u32 dequeue_cntr;
4574 	u32 interrupt_queue[64];
4575 };
4576 
4577 /**
4578  * cik_cp_compute_resume - setup the compute queue registers
4579  *
4580  * @rdev: radeon_device pointer
4581  *
4582  * Program the compute queues and test them to make sure they
4583  * are working.
4584  * Returns 0 for success, error for failure.
4585  */
4586 static int cik_cp_compute_resume(struct radeon_device *rdev)
4587 {
4588 	int r, i, j, idx;
4589 	u32 tmp;
4590 	bool use_doorbell = true;
4591 	u64 hqd_gpu_addr;
4592 	u64 mqd_gpu_addr;
4593 	u64 eop_gpu_addr;
4594 	u64 wb_gpu_addr;
4595 	u32 *buf;
4596 	struct bonaire_mqd *mqd;
4597 
4598 	r = cik_cp_compute_start(rdev);
4599 	if (r)
4600 		return r;
4601 
4602 	/* fix up chicken bits */
4603 	tmp = RREG32(CP_CPF_DEBUG);
4604 	tmp |= (1 << 23);
4605 	WREG32(CP_CPF_DEBUG, tmp);
4606 
4607 	/* init the pipes */
4608 	mutex_lock(&rdev->srbm_mutex);
4609 
4610 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4611 
4612 	cik_srbm_select(rdev, 0, 0, 0, 0);
4613 
4614 	/* write the EOP addr */
4615 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4616 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4617 
4618 	/* set the VMID assigned */
4619 	WREG32(CP_HPD_EOP_VMID, 0);
4620 
4621 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4622 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4623 	tmp &= ~EOP_SIZE_MASK;
4624 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4625 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4626 
4627 	mutex_unlock(&rdev->srbm_mutex);
4628 
4629 	/* init the queues.  Just two for now. */
4630 	for (i = 0; i < 2; i++) {
4631 		if (i == 0)
4632 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4633 		else
4634 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4635 
4636 		if (rdev->ring[idx].mqd_obj == NULL) {
4637 			r = radeon_bo_create(rdev,
4638 					     sizeof(struct bonaire_mqd),
4639 					     PAGE_SIZE, true,
4640 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4641 					     NULL, &rdev->ring[idx].mqd_obj);
4642 			if (r) {
4643 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4644 				return r;
4645 			}
4646 		}
4647 
4648 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4649 		if (unlikely(r != 0)) {
4650 			cik_cp_compute_fini(rdev);
4651 			return r;
4652 		}
4653 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4654 				  &mqd_gpu_addr);
4655 		if (r) {
4656 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4657 			cik_cp_compute_fini(rdev);
4658 			return r;
4659 		}
4660 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4661 		if (r) {
4662 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4663 			cik_cp_compute_fini(rdev);
4664 			return r;
4665 		}
4666 
4667 		/* init the mqd struct */
4668 		memset(buf, 0, sizeof(struct bonaire_mqd));
4669 
4670 		mqd = (struct bonaire_mqd *)buf;
4671 		mqd->header = 0xC0310800;
4672 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4673 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4674 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4675 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4676 
4677 		mutex_lock(&rdev->srbm_mutex);
4678 		cik_srbm_select(rdev, rdev->ring[idx].me,
4679 				rdev->ring[idx].pipe,
4680 				rdev->ring[idx].queue, 0);
4681 
4682 		/* disable wptr polling */
4683 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4684 		tmp &= ~WPTR_POLL_EN;
4685 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4686 
4687 		/* enable doorbell? */
4688 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4689 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4690 		if (use_doorbell)
4691 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4692 		else
4693 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4694 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4695 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4696 
4697 		/* disable the queue if it's active */
4698 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4699 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4700 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4701 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4702 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4703 			for (j = 0; j < rdev->usec_timeout; j++) {
4704 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4705 					break;
4706 				udelay(1);
4707 			}
4708 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4709 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4710 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4711 		}
4712 
4713 		/* set the pointer to the MQD */
4714 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4715 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4716 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4717 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4718 		/* set MQD vmid to 0 */
4719 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4720 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4721 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4722 
4723 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4724 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4725 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4726 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4727 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4728 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4729 
4730 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4731 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4732 		mqd->queue_state.cp_hqd_pq_control &=
4733 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4734 
4735 		mqd->queue_state.cp_hqd_pq_control |=
4736 			order_base_2(rdev->ring[idx].ring_size / 8);
4737 		mqd->queue_state.cp_hqd_pq_control |=
4738 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4739 #ifdef __BIG_ENDIAN
4740 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4741 #endif
4742 		mqd->queue_state.cp_hqd_pq_control &=
4743 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4744 		mqd->queue_state.cp_hqd_pq_control |=
4745 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4746 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4747 
4748 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4749 		if (i == 0)
4750 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4751 		else
4752 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4753 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4754 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4755 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4756 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4757 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4758 
4759 		/* set the wb address wether it's enabled or not */
4760 		if (i == 0)
4761 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4762 		else
4763 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4764 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4765 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4766 			upper_32_bits(wb_gpu_addr) & 0xffff;
4767 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4768 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4769 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4770 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4771 
4772 		/* enable the doorbell if requested */
4773 		if (use_doorbell) {
4774 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4775 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4776 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4777 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4778 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4779 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4780 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4781 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4782 
4783 		} else {
4784 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4785 		}
4786 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4787 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4788 
4789 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4790 		rdev->ring[idx].wptr = 0;
4791 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4792 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4793 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4794 
4795 		/* set the vmid for the queue */
4796 		mqd->queue_state.cp_hqd_vmid = 0;
4797 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4798 
4799 		/* activate the queue */
4800 		mqd->queue_state.cp_hqd_active = 1;
4801 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4802 
4803 		cik_srbm_select(rdev, 0, 0, 0, 0);
4804 		mutex_unlock(&rdev->srbm_mutex);
4805 
4806 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4807 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4808 
4809 		rdev->ring[idx].ready = true;
4810 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4811 		if (r)
4812 			rdev->ring[idx].ready = false;
4813 	}
4814 
4815 	return 0;
4816 }
4817 
4818 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4819 {
4820 	cik_cp_gfx_enable(rdev, enable);
4821 	cik_cp_compute_enable(rdev, enable);
4822 }
4823 
4824 static int cik_cp_load_microcode(struct radeon_device *rdev)
4825 {
4826 	int r;
4827 
4828 	r = cik_cp_gfx_load_microcode(rdev);
4829 	if (r)
4830 		return r;
4831 	r = cik_cp_compute_load_microcode(rdev);
4832 	if (r)
4833 		return r;
4834 
4835 	return 0;
4836 }
4837 
4838 static void cik_cp_fini(struct radeon_device *rdev)
4839 {
4840 	cik_cp_gfx_fini(rdev);
4841 	cik_cp_compute_fini(rdev);
4842 }
4843 
4844 static int cik_cp_resume(struct radeon_device *rdev)
4845 {
4846 	int r;
4847 
4848 	cik_enable_gui_idle_interrupt(rdev, false);
4849 
4850 	r = cik_cp_load_microcode(rdev);
4851 	if (r)
4852 		return r;
4853 
4854 	r = cik_cp_gfx_resume(rdev);
4855 	if (r)
4856 		return r;
4857 	r = cik_cp_compute_resume(rdev);
4858 	if (r)
4859 		return r;
4860 
4861 	cik_enable_gui_idle_interrupt(rdev, true);
4862 
4863 	return 0;
4864 }
4865 
4866 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4867 {
4868 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4869 		RREG32(GRBM_STATUS));
4870 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4871 		RREG32(GRBM_STATUS2));
4872 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4873 		RREG32(GRBM_STATUS_SE0));
4874 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4875 		RREG32(GRBM_STATUS_SE1));
4876 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4877 		RREG32(GRBM_STATUS_SE2));
4878 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4879 		RREG32(GRBM_STATUS_SE3));
4880 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4881 		RREG32(SRBM_STATUS));
4882 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4883 		RREG32(SRBM_STATUS2));
4884 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4885 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4886 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4887 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4888 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4889 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4890 		 RREG32(CP_STALLED_STAT1));
4891 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4892 		 RREG32(CP_STALLED_STAT2));
4893 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4894 		 RREG32(CP_STALLED_STAT3));
4895 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4896 		 RREG32(CP_CPF_BUSY_STAT));
4897 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4898 		 RREG32(CP_CPF_STALLED_STAT1));
4899 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4900 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4901 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4902 		 RREG32(CP_CPC_STALLED_STAT1));
4903 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4904 }
4905 
4906 /**
4907  * cik_gpu_check_soft_reset - check which blocks are busy
4908  *
4909  * @rdev: radeon_device pointer
4910  *
4911  * Check which blocks are busy and return the relevant reset
4912  * mask to be used by cik_gpu_soft_reset().
4913  * Returns a mask of the blocks to be reset.
4914  */
4915 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4916 {
4917 	u32 reset_mask = 0;
4918 	u32 tmp;
4919 
4920 	/* GRBM_STATUS */
4921 	tmp = RREG32(GRBM_STATUS);
4922 	if (tmp & (PA_BUSY | SC_BUSY |
4923 		   BCI_BUSY | SX_BUSY |
4924 		   TA_BUSY | VGT_BUSY |
4925 		   DB_BUSY | CB_BUSY |
4926 		   GDS_BUSY | SPI_BUSY |
4927 		   IA_BUSY | IA_BUSY_NO_DMA))
4928 		reset_mask |= RADEON_RESET_GFX;
4929 
4930 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4931 		reset_mask |= RADEON_RESET_CP;
4932 
4933 	/* GRBM_STATUS2 */
4934 	tmp = RREG32(GRBM_STATUS2);
4935 	if (tmp & RLC_BUSY)
4936 		reset_mask |= RADEON_RESET_RLC;
4937 
4938 	/* SDMA0_STATUS_REG */
4939 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4940 	if (!(tmp & SDMA_IDLE))
4941 		reset_mask |= RADEON_RESET_DMA;
4942 
4943 	/* SDMA1_STATUS_REG */
4944 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4945 	if (!(tmp & SDMA_IDLE))
4946 		reset_mask |= RADEON_RESET_DMA1;
4947 
4948 	/* SRBM_STATUS2 */
4949 	tmp = RREG32(SRBM_STATUS2);
4950 	if (tmp & SDMA_BUSY)
4951 		reset_mask |= RADEON_RESET_DMA;
4952 
4953 	if (tmp & SDMA1_BUSY)
4954 		reset_mask |= RADEON_RESET_DMA1;
4955 
4956 	/* SRBM_STATUS */
4957 	tmp = RREG32(SRBM_STATUS);
4958 
4959 	if (tmp & IH_BUSY)
4960 		reset_mask |= RADEON_RESET_IH;
4961 
4962 	if (tmp & SEM_BUSY)
4963 		reset_mask |= RADEON_RESET_SEM;
4964 
4965 	if (tmp & GRBM_RQ_PENDING)
4966 		reset_mask |= RADEON_RESET_GRBM;
4967 
4968 	if (tmp & VMC_BUSY)
4969 		reset_mask |= RADEON_RESET_VMC;
4970 
4971 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4972 		   MCC_BUSY | MCD_BUSY))
4973 		reset_mask |= RADEON_RESET_MC;
4974 
4975 	if (evergreen_is_display_hung(rdev))
4976 		reset_mask |= RADEON_RESET_DISPLAY;
4977 
4978 	/* Skip MC reset as it's mostly likely not hung, just busy */
4979 	if (reset_mask & RADEON_RESET_MC) {
4980 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4981 		reset_mask &= ~RADEON_RESET_MC;
4982 	}
4983 
4984 	return reset_mask;
4985 }
4986 
4987 /**
4988  * cik_gpu_soft_reset - soft reset GPU
4989  *
4990  * @rdev: radeon_device pointer
4991  * @reset_mask: mask of which blocks to reset
4992  *
4993  * Soft reset the blocks specified in @reset_mask.
4994  */
4995 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4996 {
4997 	struct evergreen_mc_save save;
4998 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4999 	u32 tmp;
5000 
5001 	if (reset_mask == 0)
5002 		return;
5003 
5004 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5005 
5006 	cik_print_gpu_status_regs(rdev);
5007 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5008 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5009 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5010 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5011 
5012 	/* disable CG/PG */
5013 	cik_fini_pg(rdev);
5014 	cik_fini_cg(rdev);
5015 
5016 	/* stop the rlc */
5017 	cik_rlc_stop(rdev);
5018 
5019 	/* Disable GFX parsing/prefetching */
5020 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5021 
5022 	/* Disable MEC parsing/prefetching */
5023 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5024 
5025 	if (reset_mask & RADEON_RESET_DMA) {
5026 		/* sdma0 */
5027 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5028 		tmp |= SDMA_HALT;
5029 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5030 	}
5031 	if (reset_mask & RADEON_RESET_DMA1) {
5032 		/* sdma1 */
5033 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5034 		tmp |= SDMA_HALT;
5035 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5036 	}
5037 
5038 	evergreen_mc_stop(rdev, &save);
5039 	if (evergreen_mc_wait_for_idle(rdev)) {
5040 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5041 	}
5042 
5043 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5044 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5045 
5046 	if (reset_mask & RADEON_RESET_CP) {
5047 		grbm_soft_reset |= SOFT_RESET_CP;
5048 
5049 		srbm_soft_reset |= SOFT_RESET_GRBM;
5050 	}
5051 
5052 	if (reset_mask & RADEON_RESET_DMA)
5053 		srbm_soft_reset |= SOFT_RESET_SDMA;
5054 
5055 	if (reset_mask & RADEON_RESET_DMA1)
5056 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5057 
5058 	if (reset_mask & RADEON_RESET_DISPLAY)
5059 		srbm_soft_reset |= SOFT_RESET_DC;
5060 
5061 	if (reset_mask & RADEON_RESET_RLC)
5062 		grbm_soft_reset |= SOFT_RESET_RLC;
5063 
5064 	if (reset_mask & RADEON_RESET_SEM)
5065 		srbm_soft_reset |= SOFT_RESET_SEM;
5066 
5067 	if (reset_mask & RADEON_RESET_IH)
5068 		srbm_soft_reset |= SOFT_RESET_IH;
5069 
5070 	if (reset_mask & RADEON_RESET_GRBM)
5071 		srbm_soft_reset |= SOFT_RESET_GRBM;
5072 
5073 	if (reset_mask & RADEON_RESET_VMC)
5074 		srbm_soft_reset |= SOFT_RESET_VMC;
5075 
5076 	if (!(rdev->flags & RADEON_IS_IGP)) {
5077 		if (reset_mask & RADEON_RESET_MC)
5078 			srbm_soft_reset |= SOFT_RESET_MC;
5079 	}
5080 
5081 	if (grbm_soft_reset) {
5082 		tmp = RREG32(GRBM_SOFT_RESET);
5083 		tmp |= grbm_soft_reset;
5084 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5085 		WREG32(GRBM_SOFT_RESET, tmp);
5086 		tmp = RREG32(GRBM_SOFT_RESET);
5087 
5088 		udelay(50);
5089 
5090 		tmp &= ~grbm_soft_reset;
5091 		WREG32(GRBM_SOFT_RESET, tmp);
5092 		tmp = RREG32(GRBM_SOFT_RESET);
5093 	}
5094 
5095 	if (srbm_soft_reset) {
5096 		tmp = RREG32(SRBM_SOFT_RESET);
5097 		tmp |= srbm_soft_reset;
5098 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5099 		WREG32(SRBM_SOFT_RESET, tmp);
5100 		tmp = RREG32(SRBM_SOFT_RESET);
5101 
5102 		udelay(50);
5103 
5104 		tmp &= ~srbm_soft_reset;
5105 		WREG32(SRBM_SOFT_RESET, tmp);
5106 		tmp = RREG32(SRBM_SOFT_RESET);
5107 	}
5108 
5109 	/* Wait a little for things to settle down */
5110 	udelay(50);
5111 
5112 	evergreen_mc_resume(rdev, &save);
5113 	udelay(50);
5114 
5115 	cik_print_gpu_status_regs(rdev);
5116 }
5117 
5118 struct kv_reset_save_regs {
5119 	u32 gmcon_reng_execute;
5120 	u32 gmcon_misc;
5121 	u32 gmcon_misc3;
5122 };
5123 
5124 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5125 				   struct kv_reset_save_regs *save)
5126 {
5127 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5128 	save->gmcon_misc = RREG32(GMCON_MISC);
5129 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5130 
5131 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5132 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5133 						STCTRL_STUTTER_EN));
5134 }
5135 
5136 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5137 				      struct kv_reset_save_regs *save)
5138 {
5139 	int i;
5140 
5141 	WREG32(GMCON_PGFSM_WRITE, 0);
5142 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5143 
5144 	for (i = 0; i < 5; i++)
5145 		WREG32(GMCON_PGFSM_WRITE, 0);
5146 
5147 	WREG32(GMCON_PGFSM_WRITE, 0);
5148 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5149 
5150 	for (i = 0; i < 5; i++)
5151 		WREG32(GMCON_PGFSM_WRITE, 0);
5152 
5153 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5154 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5155 
5156 	for (i = 0; i < 5; i++)
5157 		WREG32(GMCON_PGFSM_WRITE, 0);
5158 
5159 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5160 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5161 
5162 	for (i = 0; i < 5; i++)
5163 		WREG32(GMCON_PGFSM_WRITE, 0);
5164 
5165 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5166 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5167 
5168 	for (i = 0; i < 5; i++)
5169 		WREG32(GMCON_PGFSM_WRITE, 0);
5170 
5171 	WREG32(GMCON_PGFSM_WRITE, 0);
5172 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5173 
5174 	for (i = 0; i < 5; i++)
5175 		WREG32(GMCON_PGFSM_WRITE, 0);
5176 
5177 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5178 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5179 
5180 	for (i = 0; i < 5; i++)
5181 		WREG32(GMCON_PGFSM_WRITE, 0);
5182 
5183 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5184 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5185 
5186 	for (i = 0; i < 5; i++)
5187 		WREG32(GMCON_PGFSM_WRITE, 0);
5188 
5189 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5190 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5191 
5192 	for (i = 0; i < 5; i++)
5193 		WREG32(GMCON_PGFSM_WRITE, 0);
5194 
5195 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5196 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5197 
5198 	for (i = 0; i < 5; i++)
5199 		WREG32(GMCON_PGFSM_WRITE, 0);
5200 
5201 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5202 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5203 
5204 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5205 	WREG32(GMCON_MISC, save->gmcon_misc);
5206 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5207 }
5208 
5209 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5210 {
5211 	struct evergreen_mc_save save;
5212 	struct kv_reset_save_regs kv_save = { 0 };
5213 	u32 tmp, i;
5214 
5215 	dev_info(rdev->dev, "GPU pci config reset\n");
5216 
5217 	/* disable dpm? */
5218 
5219 	/* disable cg/pg */
5220 	cik_fini_pg(rdev);
5221 	cik_fini_cg(rdev);
5222 
5223 	/* Disable GFX parsing/prefetching */
5224 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5225 
5226 	/* Disable MEC parsing/prefetching */
5227 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5228 
5229 	/* sdma0 */
5230 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5231 	tmp |= SDMA_HALT;
5232 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5233 	/* sdma1 */
5234 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5235 	tmp |= SDMA_HALT;
5236 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5237 	/* XXX other engines? */
5238 
5239 	/* halt the rlc, disable cp internal ints */
5240 	cik_rlc_stop(rdev);
5241 
5242 	udelay(50);
5243 
5244 	/* disable mem access */
5245 	evergreen_mc_stop(rdev, &save);
5246 	if (evergreen_mc_wait_for_idle(rdev)) {
5247 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5248 	}
5249 
5250 	if (rdev->flags & RADEON_IS_IGP)
5251 		kv_save_regs_for_reset(rdev, &kv_save);
5252 
5253 	/* disable BM */
5254 	pci_clear_master(rdev->pdev);
5255 	/* reset */
5256 	radeon_pci_config_reset(rdev);
5257 
5258 	udelay(100);
5259 
5260 	/* wait for asic to come out of reset */
5261 	for (i = 0; i < rdev->usec_timeout; i++) {
5262 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5263 			break;
5264 		udelay(1);
5265 	}
5266 
5267 	/* does asic init need to be run first??? */
5268 	if (rdev->flags & RADEON_IS_IGP)
5269 		kv_restore_regs_for_reset(rdev, &kv_save);
5270 }
5271 
5272 /**
5273  * cik_asic_reset - soft reset GPU
5274  *
5275  * @rdev: radeon_device pointer
5276  * @hard: force hard reset
5277  *
5278  * Look up which blocks are hung and attempt
5279  * to reset them.
5280  * Returns 0 for success.
5281  */
5282 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5283 {
5284 	u32 reset_mask;
5285 
5286 	if (hard) {
5287 		cik_gpu_pci_config_reset(rdev);
5288 		return 0;
5289 	}
5290 
5291 	reset_mask = cik_gpu_check_soft_reset(rdev);
5292 
5293 	if (reset_mask)
5294 		r600_set_bios_scratch_engine_hung(rdev, true);
5295 
5296 	/* try soft reset */
5297 	cik_gpu_soft_reset(rdev, reset_mask);
5298 
5299 	reset_mask = cik_gpu_check_soft_reset(rdev);
5300 
5301 	/* try pci config reset */
5302 	if (reset_mask && radeon_hard_reset)
5303 		cik_gpu_pci_config_reset(rdev);
5304 
5305 	reset_mask = cik_gpu_check_soft_reset(rdev);
5306 
5307 	if (!reset_mask)
5308 		r600_set_bios_scratch_engine_hung(rdev, false);
5309 
5310 	return 0;
5311 }
5312 
5313 /**
5314  * cik_gfx_is_lockup - check if the 3D engine is locked up
5315  *
5316  * @rdev: radeon_device pointer
5317  * @ring: radeon_ring structure holding ring information
5318  *
5319  * Check if the 3D engine is locked up (CIK).
5320  * Returns true if the engine is locked, false if not.
5321  */
5322 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5323 {
5324 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5325 
5326 	if (!(reset_mask & (RADEON_RESET_GFX |
5327 			    RADEON_RESET_COMPUTE |
5328 			    RADEON_RESET_CP))) {
5329 		radeon_ring_lockup_update(rdev, ring);
5330 		return false;
5331 	}
5332 	return radeon_ring_test_lockup(rdev, ring);
5333 }
5334 
5335 /* MC */
5336 /**
5337  * cik_mc_program - program the GPU memory controller
5338  *
5339  * @rdev: radeon_device pointer
5340  *
5341  * Set the location of vram, gart, and AGP in the GPU's
5342  * physical address space (CIK).
5343  */
5344 static void cik_mc_program(struct radeon_device *rdev)
5345 {
5346 	struct evergreen_mc_save save;
5347 	u32 tmp;
5348 	int i, j;
5349 
5350 	/* Initialize HDP */
5351 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5352 		WREG32((0x2c14 + j), 0x00000000);
5353 		WREG32((0x2c18 + j), 0x00000000);
5354 		WREG32((0x2c1c + j), 0x00000000);
5355 		WREG32((0x2c20 + j), 0x00000000);
5356 		WREG32((0x2c24 + j), 0x00000000);
5357 	}
5358 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5359 
5360 	evergreen_mc_stop(rdev, &save);
5361 	if (radeon_mc_wait_for_idle(rdev)) {
5362 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5363 	}
5364 	/* Lockout access through VGA aperture*/
5365 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5366 	/* Update configuration */
5367 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5368 	       rdev->mc.vram_start >> 12);
5369 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5370 	       rdev->mc.vram_end >> 12);
5371 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5372 	       rdev->vram_scratch.gpu_addr >> 12);
5373 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5374 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5375 	WREG32(MC_VM_FB_LOCATION, tmp);
5376 	/* XXX double check these! */
5377 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5378 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5379 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5380 	WREG32(MC_VM_AGP_BASE, 0);
5381 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5382 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5383 	if (radeon_mc_wait_for_idle(rdev)) {
5384 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385 	}
5386 	evergreen_mc_resume(rdev, &save);
5387 	/* we need to own VRAM, so turn off the VGA renderer here
5388 	 * to stop it overwriting our objects */
5389 	rv515_vga_render_disable(rdev);
5390 }
5391 
5392 /**
5393  * cik_mc_init - initialize the memory controller driver params
5394  *
5395  * @rdev: radeon_device pointer
5396  *
5397  * Look up the amount of vram, vram width, and decide how to place
5398  * vram and gart within the GPU's physical address space (CIK).
5399  * Returns 0 for success.
5400  */
5401 static int cik_mc_init(struct radeon_device *rdev)
5402 {
5403 	u32 tmp;
5404 	int chansize, numchan;
5405 
5406 	/* Get VRAM informations */
5407 	rdev->mc.vram_is_ddr = true;
5408 	tmp = RREG32(MC_ARB_RAMCFG);
5409 	if (tmp & CHANSIZE_MASK) {
5410 		chansize = 64;
5411 	} else {
5412 		chansize = 32;
5413 	}
5414 	tmp = RREG32(MC_SHARED_CHMAP);
5415 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5416 	case 0:
5417 	default:
5418 		numchan = 1;
5419 		break;
5420 	case 1:
5421 		numchan = 2;
5422 		break;
5423 	case 2:
5424 		numchan = 4;
5425 		break;
5426 	case 3:
5427 		numchan = 8;
5428 		break;
5429 	case 4:
5430 		numchan = 3;
5431 		break;
5432 	case 5:
5433 		numchan = 6;
5434 		break;
5435 	case 6:
5436 		numchan = 10;
5437 		break;
5438 	case 7:
5439 		numchan = 12;
5440 		break;
5441 	case 8:
5442 		numchan = 16;
5443 		break;
5444 	}
5445 	rdev->mc.vram_width = numchan * chansize;
5446 	/* Could aper size report 0 ? */
5447 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5448 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5449 	/* size in MB on si */
5450 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5451 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5452 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5453 	si_vram_gtt_location(rdev, &rdev->mc);
5454 	radeon_update_bandwidth_info(rdev);
5455 
5456 	return 0;
5457 }
5458 
5459 /*
5460  * GART
5461  * VMID 0 is the physical GPU addresses as used by the kernel.
5462  * VMIDs 1-15 are used for userspace clients and are handled
5463  * by the radeon vm/hsa code.
5464  */
5465 /**
5466  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5467  *
5468  * @rdev: radeon_device pointer
5469  *
5470  * Flush the TLB for the VMID 0 page table (CIK).
5471  */
5472 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5473 {
5474 	/* flush hdp cache */
5475 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5476 
5477 	/* bits 0-15 are the VM contexts0-15 */
5478 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5479 }
5480 
5481 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5482 {
5483 	int i;
5484 	uint32_t sh_mem_bases, sh_mem_config;
5485 
5486 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5487 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5488 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5489 
5490 	mutex_lock(&rdev->srbm_mutex);
5491 	for (i = 8; i < 16; i++) {
5492 		cik_srbm_select(rdev, 0, 0, 0, i);
5493 		/* CP and shaders */
5494 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5495 		WREG32(SH_MEM_APE1_BASE, 1);
5496 		WREG32(SH_MEM_APE1_LIMIT, 0);
5497 		WREG32(SH_MEM_BASES, sh_mem_bases);
5498 	}
5499 	cik_srbm_select(rdev, 0, 0, 0, 0);
5500 	mutex_unlock(&rdev->srbm_mutex);
5501 }
5502 
5503 /**
5504  * cik_pcie_gart_enable - gart enable
5505  *
5506  * @rdev: radeon_device pointer
5507  *
5508  * This sets up the TLBs, programs the page tables for VMID0,
5509  * sets up the hw for VMIDs 1-15 which are allocated on
5510  * demand, and sets up the global locations for the LDS, GDS,
5511  * and GPUVM for FSA64 clients (CIK).
5512  * Returns 0 for success, errors for failure.
5513  */
5514 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5515 {
5516 	int r, i;
5517 
5518 	if (rdev->gart.robj == NULL) {
5519 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5520 		return -EINVAL;
5521 	}
5522 	r = radeon_gart_table_vram_pin(rdev);
5523 	if (r)
5524 		return r;
5525 	/* Setup TLB control */
5526 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5527 	       (0xA << 7) |
5528 	       ENABLE_L1_TLB |
5529 	       ENABLE_L1_FRAGMENT_PROCESSING |
5530 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5531 	       ENABLE_ADVANCED_DRIVER_MODEL |
5532 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5533 	/* Setup L2 cache */
5534 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5535 	       ENABLE_L2_FRAGMENT_PROCESSING |
5536 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5537 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5538 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5539 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5540 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5541 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5542 	       BANK_SELECT(4) |
5543 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5544 	/* setup context0 */
5545 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5546 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5547 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5548 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5549 			(u32)(rdev->dummy_page.addr >> 12));
5550 	WREG32(VM_CONTEXT0_CNTL2, 0);
5551 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5552 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5553 
5554 	WREG32(0x15D4, 0);
5555 	WREG32(0x15D8, 0);
5556 	WREG32(0x15DC, 0);
5557 
5558 	/* restore context1-15 */
5559 	/* set vm size, must be a multiple of 4 */
5560 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5561 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5562 	for (i = 1; i < 16; i++) {
5563 		if (i < 8)
5564 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5565 			       rdev->vm_manager.saved_table_addr[i]);
5566 		else
5567 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5568 			       rdev->vm_manager.saved_table_addr[i]);
5569 	}
5570 
5571 	/* enable context1-15 */
5572 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5573 	       (u32)(rdev->dummy_page.addr >> 12));
5574 	WREG32(VM_CONTEXT1_CNTL2, 4);
5575 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5576 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5577 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5578 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5579 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5580 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5581 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5582 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5583 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5584 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5585 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5586 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5587 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5588 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5589 
5590 	if (rdev->family == CHIP_KAVERI) {
5591 		u32 tmp = RREG32(CHUB_CONTROL);
5592 		tmp &= ~BYPASS_VM;
5593 		WREG32(CHUB_CONTROL, tmp);
5594 	}
5595 
5596 	/* XXX SH_MEM regs */
5597 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5598 	mutex_lock(&rdev->srbm_mutex);
5599 	for (i = 0; i < 16; i++) {
5600 		cik_srbm_select(rdev, 0, 0, 0, i);
5601 		/* CP and shaders */
5602 		WREG32(SH_MEM_CONFIG, 0);
5603 		WREG32(SH_MEM_APE1_BASE, 1);
5604 		WREG32(SH_MEM_APE1_LIMIT, 0);
5605 		WREG32(SH_MEM_BASES, 0);
5606 		/* SDMA GFX */
5607 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5608 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5609 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5610 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5611 		/* XXX SDMA RLC - todo */
5612 	}
5613 	cik_srbm_select(rdev, 0, 0, 0, 0);
5614 	mutex_unlock(&rdev->srbm_mutex);
5615 
5616 	cik_pcie_init_compute_vmid(rdev);
5617 
5618 	cik_pcie_gart_tlb_flush(rdev);
5619 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5620 		 (unsigned)(rdev->mc.gtt_size >> 20),
5621 		 (unsigned long long)rdev->gart.table_addr);
5622 	rdev->gart.ready = true;
5623 	return 0;
5624 }
5625 
5626 /**
5627  * cik_pcie_gart_disable - gart disable
5628  *
5629  * @rdev: radeon_device pointer
5630  *
5631  * This disables all VM page table (CIK).
5632  */
5633 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5634 {
5635 	unsigned i;
5636 
5637 	for (i = 1; i < 16; ++i) {
5638 		uint32_t reg;
5639 		if (i < 8)
5640 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5641 		else
5642 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5643 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5644 	}
5645 
5646 	/* Disable all tables */
5647 	WREG32(VM_CONTEXT0_CNTL, 0);
5648 	WREG32(VM_CONTEXT1_CNTL, 0);
5649 	/* Setup TLB control */
5650 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5651 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5652 	/* Setup L2 cache */
5653 	WREG32(VM_L2_CNTL,
5654 	       ENABLE_L2_FRAGMENT_PROCESSING |
5655 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5656 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5657 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5658 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5659 	WREG32(VM_L2_CNTL2, 0);
5660 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5661 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5662 	radeon_gart_table_vram_unpin(rdev);
5663 }
5664 
5665 /**
5666  * cik_pcie_gart_fini - vm fini callback
5667  *
5668  * @rdev: radeon_device pointer
5669  *
5670  * Tears down the driver GART/VM setup (CIK).
5671  */
5672 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5673 {
5674 	cik_pcie_gart_disable(rdev);
5675 	radeon_gart_table_vram_free(rdev);
5676 	radeon_gart_fini(rdev);
5677 }
5678 
5679 /* vm parser */
5680 /**
5681  * cik_ib_parse - vm ib_parse callback
5682  *
5683  * @rdev: radeon_device pointer
5684  * @ib: indirect buffer pointer
5685  *
5686  * CIK uses hw IB checking so this is a nop (CIK).
5687  */
5688 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5689 {
5690 	return 0;
5691 }
5692 
5693 /*
5694  * vm
5695  * VMID 0 is the physical GPU addresses as used by the kernel.
5696  * VMIDs 1-15 are used for userspace clients and are handled
5697  * by the radeon vm/hsa code.
5698  */
5699 /**
5700  * cik_vm_init - cik vm init callback
5701  *
5702  * @rdev: radeon_device pointer
5703  *
5704  * Inits cik specific vm parameters (number of VMs, base of vram for
5705  * VMIDs 1-15) (CIK).
5706  * Returns 0 for success.
5707  */
5708 int cik_vm_init(struct radeon_device *rdev)
5709 {
5710 	/*
5711 	 * number of VMs
5712 	 * VMID 0 is reserved for System
5713 	 * radeon graphics/compute will use VMIDs 1-7
5714 	 * amdkfd will use VMIDs 8-15
5715 	 */
5716 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5717 	/* base offset of vram pages */
5718 	if (rdev->flags & RADEON_IS_IGP) {
5719 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5720 		tmp <<= 22;
5721 		rdev->vm_manager.vram_base_offset = tmp;
5722 	} else
5723 		rdev->vm_manager.vram_base_offset = 0;
5724 
5725 	return 0;
5726 }
5727 
5728 /**
5729  * cik_vm_fini - cik vm fini callback
5730  *
5731  * @rdev: radeon_device pointer
5732  *
5733  * Tear down any asic specific VM setup (CIK).
5734  */
5735 void cik_vm_fini(struct radeon_device *rdev)
5736 {
5737 }
5738 
5739 /**
5740  * cik_vm_decode_fault - print human readable fault info
5741  *
5742  * @rdev: radeon_device pointer
5743  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5744  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5745  *
5746  * Print human readable fault information (CIK).
5747  */
5748 static void cik_vm_decode_fault(struct radeon_device *rdev,
5749 				u32 status, u32 addr, u32 mc_client)
5750 {
5751 	u32 mc_id;
5752 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5753 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5754 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5755 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5756 
5757 	if (rdev->family == CHIP_HAWAII)
5758 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5759 	else
5760 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5761 
5762 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5763 	       protections, vmid, addr,
5764 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5765 	       block, mc_client, mc_id);
5766 }
5767 
5768 /**
5769  * cik_vm_flush - cik vm flush using the CP
5770  *
5771  * @rdev: radeon_device pointer
5772  *
5773  * Update the page table base and flush the VM TLB
5774  * using the CP (CIK).
5775  */
5776 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5777 		  unsigned vm_id, uint64_t pd_addr)
5778 {
5779 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5780 
5781 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5782 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5783 				 WRITE_DATA_DST_SEL(0)));
5784 	if (vm_id < 8) {
5785 		radeon_ring_write(ring,
5786 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5787 	} else {
5788 		radeon_ring_write(ring,
5789 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5790 	}
5791 	radeon_ring_write(ring, 0);
5792 	radeon_ring_write(ring, pd_addr >> 12);
5793 
5794 	/* update SH_MEM_* regs */
5795 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797 				 WRITE_DATA_DST_SEL(0)));
5798 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5799 	radeon_ring_write(ring, 0);
5800 	radeon_ring_write(ring, VMID(vm_id));
5801 
5802 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5803 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5804 				 WRITE_DATA_DST_SEL(0)));
5805 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5806 	radeon_ring_write(ring, 0);
5807 
5808 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5809 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5810 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5811 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5812 
5813 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5814 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5815 				 WRITE_DATA_DST_SEL(0)));
5816 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5817 	radeon_ring_write(ring, 0);
5818 	radeon_ring_write(ring, VMID(0));
5819 
5820 	/* HDP flush */
5821 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5822 
5823 	/* bits 0-15 are the VM contexts0-15 */
5824 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5825 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5826 				 WRITE_DATA_DST_SEL(0)));
5827 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5828 	radeon_ring_write(ring, 0);
5829 	radeon_ring_write(ring, 1 << vm_id);
5830 
5831 	/* wait for the invalidate to complete */
5832 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5833 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5834 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5835 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5836 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5837 	radeon_ring_write(ring, 0);
5838 	radeon_ring_write(ring, 0); /* ref */
5839 	radeon_ring_write(ring, 0); /* mask */
5840 	radeon_ring_write(ring, 0x20); /* poll interval */
5841 
5842 	/* compute doesn't have PFP */
5843 	if (usepfp) {
5844 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5845 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5846 		radeon_ring_write(ring, 0x0);
5847 	}
5848 }
5849 
5850 /*
5851  * RLC
5852  * The RLC is a multi-purpose microengine that handles a
5853  * variety of functions, the most important of which is
5854  * the interrupt controller.
5855  */
5856 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5857 					  bool enable)
5858 {
5859 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5860 
5861 	if (enable)
5862 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5863 	else
5864 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5865 	WREG32(CP_INT_CNTL_RING0, tmp);
5866 }
5867 
5868 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5869 {
5870 	u32 tmp;
5871 
5872 	tmp = RREG32(RLC_LB_CNTL);
5873 	if (enable)
5874 		tmp |= LOAD_BALANCE_ENABLE;
5875 	else
5876 		tmp &= ~LOAD_BALANCE_ENABLE;
5877 	WREG32(RLC_LB_CNTL, tmp);
5878 }
5879 
5880 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5881 {
5882 	u32 i, j, k;
5883 	u32 mask;
5884 
5885 	mutex_lock(&rdev->grbm_idx_mutex);
5886 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5887 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5888 			cik_select_se_sh(rdev, i, j);
5889 			for (k = 0; k < rdev->usec_timeout; k++) {
5890 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5891 					break;
5892 				udelay(1);
5893 			}
5894 		}
5895 	}
5896 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5897 	mutex_unlock(&rdev->grbm_idx_mutex);
5898 
5899 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5900 	for (k = 0; k < rdev->usec_timeout; k++) {
5901 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5902 			break;
5903 		udelay(1);
5904 	}
5905 }
5906 
5907 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5908 {
5909 	u32 tmp;
5910 
5911 	tmp = RREG32(RLC_CNTL);
5912 	if (tmp != rlc)
5913 		WREG32(RLC_CNTL, rlc);
5914 }
5915 
5916 static u32 cik_halt_rlc(struct radeon_device *rdev)
5917 {
5918 	u32 data, orig;
5919 
5920 	orig = data = RREG32(RLC_CNTL);
5921 
5922 	if (data & RLC_ENABLE) {
5923 		u32 i;
5924 
5925 		data &= ~RLC_ENABLE;
5926 		WREG32(RLC_CNTL, data);
5927 
5928 		for (i = 0; i < rdev->usec_timeout; i++) {
5929 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5930 				break;
5931 			udelay(1);
5932 		}
5933 
5934 		cik_wait_for_rlc_serdes(rdev);
5935 	}
5936 
5937 	return orig;
5938 }
5939 
5940 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5941 {
5942 	u32 tmp, i, mask;
5943 
5944 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5945 	WREG32(RLC_GPR_REG2, tmp);
5946 
5947 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5948 	for (i = 0; i < rdev->usec_timeout; i++) {
5949 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5950 			break;
5951 		udelay(1);
5952 	}
5953 
5954 	for (i = 0; i < rdev->usec_timeout; i++) {
5955 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5956 			break;
5957 		udelay(1);
5958 	}
5959 }
5960 
5961 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5962 {
5963 	u32 tmp;
5964 
5965 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5966 	WREG32(RLC_GPR_REG2, tmp);
5967 }
5968 
5969 /**
5970  * cik_rlc_stop - stop the RLC ME
5971  *
5972  * @rdev: radeon_device pointer
5973  *
5974  * Halt the RLC ME (MicroEngine) (CIK).
5975  */
5976 static void cik_rlc_stop(struct radeon_device *rdev)
5977 {
5978 	WREG32(RLC_CNTL, 0);
5979 
5980 	cik_enable_gui_idle_interrupt(rdev, false);
5981 
5982 	cik_wait_for_rlc_serdes(rdev);
5983 }
5984 
5985 /**
5986  * cik_rlc_start - start the RLC ME
5987  *
5988  * @rdev: radeon_device pointer
5989  *
5990  * Unhalt the RLC ME (MicroEngine) (CIK).
5991  */
5992 static void cik_rlc_start(struct radeon_device *rdev)
5993 {
5994 	WREG32(RLC_CNTL, RLC_ENABLE);
5995 
5996 	cik_enable_gui_idle_interrupt(rdev, true);
5997 
5998 	udelay(50);
5999 }
6000 
6001 /**
6002  * cik_rlc_resume - setup the RLC hw
6003  *
6004  * @rdev: radeon_device pointer
6005  *
6006  * Initialize the RLC registers, load the ucode,
6007  * and start the RLC (CIK).
6008  * Returns 0 for success, -EINVAL if the ucode is not available.
6009  */
6010 static int cik_rlc_resume(struct radeon_device *rdev)
6011 {
6012 	u32 i, size, tmp;
6013 
6014 	if (!rdev->rlc_fw)
6015 		return -EINVAL;
6016 
6017 	cik_rlc_stop(rdev);
6018 
6019 	/* disable CG */
6020 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6021 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6022 
6023 	si_rlc_reset(rdev);
6024 
6025 	cik_init_pg(rdev);
6026 
6027 	cik_init_cg(rdev);
6028 
6029 	WREG32(RLC_LB_CNTR_INIT, 0);
6030 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6031 
6032 	mutex_lock(&rdev->grbm_idx_mutex);
6033 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6034 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6035 	WREG32(RLC_LB_PARAMS, 0x00600408);
6036 	WREG32(RLC_LB_CNTL, 0x80000004);
6037 	mutex_unlock(&rdev->grbm_idx_mutex);
6038 
6039 	WREG32(RLC_MC_CNTL, 0);
6040 	WREG32(RLC_UCODE_CNTL, 0);
6041 
6042 	if (rdev->new_fw) {
6043 		const struct rlc_firmware_header_v1_0 *hdr =
6044 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6045 		const __le32 *fw_data = (const __le32 *)
6046 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6047 
6048 		radeon_ucode_print_rlc_hdr(&hdr->header);
6049 
6050 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6051 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6052 		for (i = 0; i < size; i++)
6053 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6054 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6055 	} else {
6056 		const __be32 *fw_data;
6057 
6058 		switch (rdev->family) {
6059 		case CHIP_BONAIRE:
6060 		case CHIP_HAWAII:
6061 		default:
6062 			size = BONAIRE_RLC_UCODE_SIZE;
6063 			break;
6064 		case CHIP_KAVERI:
6065 			size = KV_RLC_UCODE_SIZE;
6066 			break;
6067 		case CHIP_KABINI:
6068 			size = KB_RLC_UCODE_SIZE;
6069 			break;
6070 		case CHIP_MULLINS:
6071 			size = ML_RLC_UCODE_SIZE;
6072 			break;
6073 		}
6074 
6075 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6076 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6077 		for (i = 0; i < size; i++)
6078 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6079 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6080 	}
6081 
6082 	/* XXX - find out what chips support lbpw */
6083 	cik_enable_lbpw(rdev, false);
6084 
6085 	if (rdev->family == CHIP_BONAIRE)
6086 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6087 
6088 	cik_rlc_start(rdev);
6089 
6090 	return 0;
6091 }
6092 
6093 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6094 {
6095 	u32 data, orig, tmp, tmp2;
6096 
6097 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6098 
6099 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6100 		cik_enable_gui_idle_interrupt(rdev, true);
6101 
6102 		tmp = cik_halt_rlc(rdev);
6103 
6104 		mutex_lock(&rdev->grbm_idx_mutex);
6105 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6106 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6107 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6108 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6109 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6110 		mutex_unlock(&rdev->grbm_idx_mutex);
6111 
6112 		cik_update_rlc(rdev, tmp);
6113 
6114 		data |= CGCG_EN | CGLS_EN;
6115 	} else {
6116 		cik_enable_gui_idle_interrupt(rdev, false);
6117 
6118 		RREG32(CB_CGTT_SCLK_CTRL);
6119 		RREG32(CB_CGTT_SCLK_CTRL);
6120 		RREG32(CB_CGTT_SCLK_CTRL);
6121 		RREG32(CB_CGTT_SCLK_CTRL);
6122 
6123 		data &= ~(CGCG_EN | CGLS_EN);
6124 	}
6125 
6126 	if (orig != data)
6127 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6128 
6129 }
6130 
6131 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6132 {
6133 	u32 data, orig, tmp = 0;
6134 
6135 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6136 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6137 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6138 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6139 				data |= CP_MEM_LS_EN;
6140 				if (orig != data)
6141 					WREG32(CP_MEM_SLP_CNTL, data);
6142 			}
6143 		}
6144 
6145 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6146 		data |= 0x00000001;
6147 		data &= 0xfffffffd;
6148 		if (orig != data)
6149 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6150 
6151 		tmp = cik_halt_rlc(rdev);
6152 
6153 		mutex_lock(&rdev->grbm_idx_mutex);
6154 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6155 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6156 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6157 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6158 		WREG32(RLC_SERDES_WR_CTRL, data);
6159 		mutex_unlock(&rdev->grbm_idx_mutex);
6160 
6161 		cik_update_rlc(rdev, tmp);
6162 
6163 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6164 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6165 			data &= ~SM_MODE_MASK;
6166 			data |= SM_MODE(0x2);
6167 			data |= SM_MODE_ENABLE;
6168 			data &= ~CGTS_OVERRIDE;
6169 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6170 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6171 				data &= ~CGTS_LS_OVERRIDE;
6172 			data &= ~ON_MONITOR_ADD_MASK;
6173 			data |= ON_MONITOR_ADD_EN;
6174 			data |= ON_MONITOR_ADD(0x96);
6175 			if (orig != data)
6176 				WREG32(CGTS_SM_CTRL_REG, data);
6177 		}
6178 	} else {
6179 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6180 		data |= 0x00000003;
6181 		if (orig != data)
6182 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6183 
6184 		data = RREG32(RLC_MEM_SLP_CNTL);
6185 		if (data & RLC_MEM_LS_EN) {
6186 			data &= ~RLC_MEM_LS_EN;
6187 			WREG32(RLC_MEM_SLP_CNTL, data);
6188 		}
6189 
6190 		data = RREG32(CP_MEM_SLP_CNTL);
6191 		if (data & CP_MEM_LS_EN) {
6192 			data &= ~CP_MEM_LS_EN;
6193 			WREG32(CP_MEM_SLP_CNTL, data);
6194 		}
6195 
6196 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6197 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6198 		if (orig != data)
6199 			WREG32(CGTS_SM_CTRL_REG, data);
6200 
6201 		tmp = cik_halt_rlc(rdev);
6202 
6203 		mutex_lock(&rdev->grbm_idx_mutex);
6204 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6205 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6206 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6207 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6208 		WREG32(RLC_SERDES_WR_CTRL, data);
6209 		mutex_unlock(&rdev->grbm_idx_mutex);
6210 
6211 		cik_update_rlc(rdev, tmp);
6212 	}
6213 }
6214 
6215 static const u32 mc_cg_registers[] =
6216 {
6217 	MC_HUB_MISC_HUB_CG,
6218 	MC_HUB_MISC_SIP_CG,
6219 	MC_HUB_MISC_VM_CG,
6220 	MC_XPB_CLK_GAT,
6221 	ATC_MISC_CG,
6222 	MC_CITF_MISC_WR_CG,
6223 	MC_CITF_MISC_RD_CG,
6224 	MC_CITF_MISC_VM_CG,
6225 	VM_L2_CG,
6226 };
6227 
6228 static void cik_enable_mc_ls(struct radeon_device *rdev,
6229 			     bool enable)
6230 {
6231 	int i;
6232 	u32 orig, data;
6233 
6234 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6235 		orig = data = RREG32(mc_cg_registers[i]);
6236 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6237 			data |= MC_LS_ENABLE;
6238 		else
6239 			data &= ~MC_LS_ENABLE;
6240 		if (data != orig)
6241 			WREG32(mc_cg_registers[i], data);
6242 	}
6243 }
6244 
6245 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6246 			       bool enable)
6247 {
6248 	int i;
6249 	u32 orig, data;
6250 
6251 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6252 		orig = data = RREG32(mc_cg_registers[i]);
6253 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6254 			data |= MC_CG_ENABLE;
6255 		else
6256 			data &= ~MC_CG_ENABLE;
6257 		if (data != orig)
6258 			WREG32(mc_cg_registers[i], data);
6259 	}
6260 }
6261 
6262 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6263 				 bool enable)
6264 {
6265 	u32 orig, data;
6266 
6267 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6268 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6269 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6270 	} else {
6271 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6272 		data |= 0xff000000;
6273 		if (data != orig)
6274 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6275 
6276 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6277 		data |= 0xff000000;
6278 		if (data != orig)
6279 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6280 	}
6281 }
6282 
6283 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6284 				 bool enable)
6285 {
6286 	u32 orig, data;
6287 
6288 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6289 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6290 		data |= 0x100;
6291 		if (orig != data)
6292 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6293 
6294 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6295 		data |= 0x100;
6296 		if (orig != data)
6297 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6298 	} else {
6299 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6300 		data &= ~0x100;
6301 		if (orig != data)
6302 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6303 
6304 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6305 		data &= ~0x100;
6306 		if (orig != data)
6307 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6308 	}
6309 }
6310 
6311 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6312 				bool enable)
6313 {
6314 	u32 orig, data;
6315 
6316 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6317 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6318 		data = 0xfff;
6319 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6320 
6321 		orig = data = RREG32(UVD_CGC_CTRL);
6322 		data |= DCM;
6323 		if (orig != data)
6324 			WREG32(UVD_CGC_CTRL, data);
6325 	} else {
6326 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6327 		data &= ~0xfff;
6328 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6329 
6330 		orig = data = RREG32(UVD_CGC_CTRL);
6331 		data &= ~DCM;
6332 		if (orig != data)
6333 			WREG32(UVD_CGC_CTRL, data);
6334 	}
6335 }
6336 
6337 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6338 			       bool enable)
6339 {
6340 	u32 orig, data;
6341 
6342 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6343 
6344 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6345 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6346 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6347 	else
6348 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6349 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6350 
6351 	if (orig != data)
6352 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6353 }
6354 
6355 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6356 				bool enable)
6357 {
6358 	u32 orig, data;
6359 
6360 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6361 
6362 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6363 		data &= ~CLOCK_GATING_DIS;
6364 	else
6365 		data |= CLOCK_GATING_DIS;
6366 
6367 	if (orig != data)
6368 		WREG32(HDP_HOST_PATH_CNTL, data);
6369 }
6370 
6371 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6372 			      bool enable)
6373 {
6374 	u32 orig, data;
6375 
6376 	orig = data = RREG32(HDP_MEM_POWER_LS);
6377 
6378 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6379 		data |= HDP_LS_ENABLE;
6380 	else
6381 		data &= ~HDP_LS_ENABLE;
6382 
6383 	if (orig != data)
6384 		WREG32(HDP_MEM_POWER_LS, data);
6385 }
6386 
6387 void cik_update_cg(struct radeon_device *rdev,
6388 		   u32 block, bool enable)
6389 {
6390 
6391 	if (block & RADEON_CG_BLOCK_GFX) {
6392 		cik_enable_gui_idle_interrupt(rdev, false);
6393 		/* order matters! */
6394 		if (enable) {
6395 			cik_enable_mgcg(rdev, true);
6396 			cik_enable_cgcg(rdev, true);
6397 		} else {
6398 			cik_enable_cgcg(rdev, false);
6399 			cik_enable_mgcg(rdev, false);
6400 		}
6401 		cik_enable_gui_idle_interrupt(rdev, true);
6402 	}
6403 
6404 	if (block & RADEON_CG_BLOCK_MC) {
6405 		if (!(rdev->flags & RADEON_IS_IGP)) {
6406 			cik_enable_mc_mgcg(rdev, enable);
6407 			cik_enable_mc_ls(rdev, enable);
6408 		}
6409 	}
6410 
6411 	if (block & RADEON_CG_BLOCK_SDMA) {
6412 		cik_enable_sdma_mgcg(rdev, enable);
6413 		cik_enable_sdma_mgls(rdev, enable);
6414 	}
6415 
6416 	if (block & RADEON_CG_BLOCK_BIF) {
6417 		cik_enable_bif_mgls(rdev, enable);
6418 	}
6419 
6420 	if (block & RADEON_CG_BLOCK_UVD) {
6421 		if (rdev->has_uvd)
6422 			cik_enable_uvd_mgcg(rdev, enable);
6423 	}
6424 
6425 	if (block & RADEON_CG_BLOCK_HDP) {
6426 		cik_enable_hdp_mgcg(rdev, enable);
6427 		cik_enable_hdp_ls(rdev, enable);
6428 	}
6429 
6430 	if (block & RADEON_CG_BLOCK_VCE) {
6431 		vce_v2_0_enable_mgcg(rdev, enable);
6432 	}
6433 }
6434 
6435 static void cik_init_cg(struct radeon_device *rdev)
6436 {
6437 
6438 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6439 
6440 	if (rdev->has_uvd)
6441 		si_init_uvd_internal_cg(rdev);
6442 
6443 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6444 			     RADEON_CG_BLOCK_SDMA |
6445 			     RADEON_CG_BLOCK_BIF |
6446 			     RADEON_CG_BLOCK_UVD |
6447 			     RADEON_CG_BLOCK_HDP), true);
6448 }
6449 
6450 static void cik_fini_cg(struct radeon_device *rdev)
6451 {
6452 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6453 			     RADEON_CG_BLOCK_SDMA |
6454 			     RADEON_CG_BLOCK_BIF |
6455 			     RADEON_CG_BLOCK_UVD |
6456 			     RADEON_CG_BLOCK_HDP), false);
6457 
6458 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6459 }
6460 
6461 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6462 					  bool enable)
6463 {
6464 	u32 data, orig;
6465 
6466 	orig = data = RREG32(RLC_PG_CNTL);
6467 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6468 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6469 	else
6470 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6471 	if (orig != data)
6472 		WREG32(RLC_PG_CNTL, data);
6473 }
6474 
6475 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6476 					  bool enable)
6477 {
6478 	u32 data, orig;
6479 
6480 	orig = data = RREG32(RLC_PG_CNTL);
6481 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6482 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6483 	else
6484 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6485 	if (orig != data)
6486 		WREG32(RLC_PG_CNTL, data);
6487 }
6488 
6489 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6490 {
6491 	u32 data, orig;
6492 
6493 	orig = data = RREG32(RLC_PG_CNTL);
6494 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6495 		data &= ~DISABLE_CP_PG;
6496 	else
6497 		data |= DISABLE_CP_PG;
6498 	if (orig != data)
6499 		WREG32(RLC_PG_CNTL, data);
6500 }
6501 
6502 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6503 {
6504 	u32 data, orig;
6505 
6506 	orig = data = RREG32(RLC_PG_CNTL);
6507 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6508 		data &= ~DISABLE_GDS_PG;
6509 	else
6510 		data |= DISABLE_GDS_PG;
6511 	if (orig != data)
6512 		WREG32(RLC_PG_CNTL, data);
6513 }
6514 
6515 #define CP_ME_TABLE_SIZE    96
6516 #define CP_ME_TABLE_OFFSET  2048
6517 #define CP_MEC_TABLE_OFFSET 4096
6518 
6519 void cik_init_cp_pg_table(struct radeon_device *rdev)
6520 {
6521 	volatile u32 *dst_ptr;
6522 	int me, i, max_me = 4;
6523 	u32 bo_offset = 0;
6524 	u32 table_offset, table_size;
6525 
6526 	if (rdev->family == CHIP_KAVERI)
6527 		max_me = 5;
6528 
6529 	if (rdev->rlc.cp_table_ptr == NULL)
6530 		return;
6531 
6532 	/* write the cp table buffer */
6533 	dst_ptr = rdev->rlc.cp_table_ptr;
6534 	for (me = 0; me < max_me; me++) {
6535 		if (rdev->new_fw) {
6536 			const __le32 *fw_data;
6537 			const struct gfx_firmware_header_v1_0 *hdr;
6538 
6539 			if (me == 0) {
6540 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6541 				fw_data = (const __le32 *)
6542 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543 				table_offset = le32_to_cpu(hdr->jt_offset);
6544 				table_size = le32_to_cpu(hdr->jt_size);
6545 			} else if (me == 1) {
6546 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6547 				fw_data = (const __le32 *)
6548 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549 				table_offset = le32_to_cpu(hdr->jt_offset);
6550 				table_size = le32_to_cpu(hdr->jt_size);
6551 			} else if (me == 2) {
6552 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6553 				fw_data = (const __le32 *)
6554 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555 				table_offset = le32_to_cpu(hdr->jt_offset);
6556 				table_size = le32_to_cpu(hdr->jt_size);
6557 			} else if (me == 3) {
6558 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6559 				fw_data = (const __le32 *)
6560 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6561 				table_offset = le32_to_cpu(hdr->jt_offset);
6562 				table_size = le32_to_cpu(hdr->jt_size);
6563 			} else {
6564 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6565 				fw_data = (const __le32 *)
6566 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6567 				table_offset = le32_to_cpu(hdr->jt_offset);
6568 				table_size = le32_to_cpu(hdr->jt_size);
6569 			}
6570 
6571 			for (i = 0; i < table_size; i ++) {
6572 				dst_ptr[bo_offset + i] =
6573 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6574 			}
6575 			bo_offset += table_size;
6576 		} else {
6577 			const __be32 *fw_data;
6578 			table_size = CP_ME_TABLE_SIZE;
6579 
6580 			if (me == 0) {
6581 				fw_data = (const __be32 *)rdev->ce_fw->data;
6582 				table_offset = CP_ME_TABLE_OFFSET;
6583 			} else if (me == 1) {
6584 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6585 				table_offset = CP_ME_TABLE_OFFSET;
6586 			} else if (me == 2) {
6587 				fw_data = (const __be32 *)rdev->me_fw->data;
6588 				table_offset = CP_ME_TABLE_OFFSET;
6589 			} else {
6590 				fw_data = (const __be32 *)rdev->mec_fw->data;
6591 				table_offset = CP_MEC_TABLE_OFFSET;
6592 			}
6593 
6594 			for (i = 0; i < table_size; i ++) {
6595 				dst_ptr[bo_offset + i] =
6596 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6597 			}
6598 			bo_offset += table_size;
6599 		}
6600 	}
6601 }
6602 
6603 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6604 				bool enable)
6605 {
6606 	u32 data, orig;
6607 
6608 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6609 		orig = data = RREG32(RLC_PG_CNTL);
6610 		data |= GFX_PG_ENABLE;
6611 		if (orig != data)
6612 			WREG32(RLC_PG_CNTL, data);
6613 
6614 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6615 		data |= AUTO_PG_EN;
6616 		if (orig != data)
6617 			WREG32(RLC_AUTO_PG_CTRL, data);
6618 	} else {
6619 		orig = data = RREG32(RLC_PG_CNTL);
6620 		data &= ~GFX_PG_ENABLE;
6621 		if (orig != data)
6622 			WREG32(RLC_PG_CNTL, data);
6623 
6624 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6625 		data &= ~AUTO_PG_EN;
6626 		if (orig != data)
6627 			WREG32(RLC_AUTO_PG_CTRL, data);
6628 
6629 		data = RREG32(DB_RENDER_CONTROL);
6630 	}
6631 }
6632 
6633 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6634 {
6635 	u32 mask = 0, tmp, tmp1;
6636 	int i;
6637 
6638 	mutex_lock(&rdev->grbm_idx_mutex);
6639 	cik_select_se_sh(rdev, se, sh);
6640 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6641 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6642 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6643 	mutex_unlock(&rdev->grbm_idx_mutex);
6644 
6645 	tmp &= 0xffff0000;
6646 
6647 	tmp |= tmp1;
6648 	tmp >>= 16;
6649 
6650 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6651 		mask <<= 1;
6652 		mask |= 1;
6653 	}
6654 
6655 	return (~tmp) & mask;
6656 }
6657 
6658 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6659 {
6660 	u32 i, j, k, active_cu_number = 0;
6661 	u32 mask, counter, cu_bitmap;
6662 	u32 tmp = 0;
6663 
6664 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6665 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6666 			mask = 1;
6667 			cu_bitmap = 0;
6668 			counter = 0;
6669 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6670 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6671 					if (counter < 2)
6672 						cu_bitmap |= mask;
6673 					counter ++;
6674 				}
6675 				mask <<= 1;
6676 			}
6677 
6678 			active_cu_number += counter;
6679 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6680 		}
6681 	}
6682 
6683 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6684 
6685 	tmp = RREG32(RLC_MAX_PG_CU);
6686 	tmp &= ~MAX_PU_CU_MASK;
6687 	tmp |= MAX_PU_CU(active_cu_number);
6688 	WREG32(RLC_MAX_PG_CU, tmp);
6689 }
6690 
6691 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6692 				       bool enable)
6693 {
6694 	u32 data, orig;
6695 
6696 	orig = data = RREG32(RLC_PG_CNTL);
6697 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6698 		data |= STATIC_PER_CU_PG_ENABLE;
6699 	else
6700 		data &= ~STATIC_PER_CU_PG_ENABLE;
6701 	if (orig != data)
6702 		WREG32(RLC_PG_CNTL, data);
6703 }
6704 
6705 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6706 					bool enable)
6707 {
6708 	u32 data, orig;
6709 
6710 	orig = data = RREG32(RLC_PG_CNTL);
6711 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6712 		data |= DYN_PER_CU_PG_ENABLE;
6713 	else
6714 		data &= ~DYN_PER_CU_PG_ENABLE;
6715 	if (orig != data)
6716 		WREG32(RLC_PG_CNTL, data);
6717 }
6718 
6719 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6720 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6721 
6722 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6723 {
6724 	u32 data, orig;
6725 	u32 i;
6726 
6727 	if (rdev->rlc.cs_data) {
6728 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6729 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6730 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6731 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6732 	} else {
6733 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6734 		for (i = 0; i < 3; i++)
6735 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6736 	}
6737 	if (rdev->rlc.reg_list) {
6738 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6739 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6740 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6741 	}
6742 
6743 	orig = data = RREG32(RLC_PG_CNTL);
6744 	data |= GFX_PG_SRC;
6745 	if (orig != data)
6746 		WREG32(RLC_PG_CNTL, data);
6747 
6748 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6749 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6750 
6751 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6752 	data &= ~IDLE_POLL_COUNT_MASK;
6753 	data |= IDLE_POLL_COUNT(0x60);
6754 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6755 
6756 	data = 0x10101010;
6757 	WREG32(RLC_PG_DELAY, data);
6758 
6759 	data = RREG32(RLC_PG_DELAY_2);
6760 	data &= ~0xff;
6761 	data |= 0x3;
6762 	WREG32(RLC_PG_DELAY_2, data);
6763 
6764 	data = RREG32(RLC_AUTO_PG_CTRL);
6765 	data &= ~GRBM_REG_SGIT_MASK;
6766 	data |= GRBM_REG_SGIT(0x700);
6767 	WREG32(RLC_AUTO_PG_CTRL, data);
6768 
6769 }
6770 
6771 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6772 {
6773 	cik_enable_gfx_cgpg(rdev, enable);
6774 	cik_enable_gfx_static_mgpg(rdev, enable);
6775 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6776 }
6777 
6778 u32 cik_get_csb_size(struct radeon_device *rdev)
6779 {
6780 	u32 count = 0;
6781 	const struct cs_section_def *sect = NULL;
6782 	const struct cs_extent_def *ext = NULL;
6783 
6784 	if (rdev->rlc.cs_data == NULL)
6785 		return 0;
6786 
6787 	/* begin clear state */
6788 	count += 2;
6789 	/* context control state */
6790 	count += 3;
6791 
6792 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6793 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6794 			if (sect->id == SECT_CONTEXT)
6795 				count += 2 + ext->reg_count;
6796 			else
6797 				return 0;
6798 		}
6799 	}
6800 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6801 	count += 4;
6802 	/* end clear state */
6803 	count += 2;
6804 	/* clear state */
6805 	count += 2;
6806 
6807 	return count;
6808 }
6809 
6810 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6811 {
6812 	u32 count = 0, i;
6813 	const struct cs_section_def *sect = NULL;
6814 	const struct cs_extent_def *ext = NULL;
6815 
6816 	if (rdev->rlc.cs_data == NULL)
6817 		return;
6818 	if (buffer == NULL)
6819 		return;
6820 
6821 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6822 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6823 
6824 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6825 	buffer[count++] = cpu_to_le32(0x80000000);
6826 	buffer[count++] = cpu_to_le32(0x80000000);
6827 
6828 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6829 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6830 			if (sect->id == SECT_CONTEXT) {
6831 				buffer[count++] =
6832 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6833 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6834 				for (i = 0; i < ext->reg_count; i++)
6835 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6836 			} else {
6837 				return;
6838 			}
6839 		}
6840 	}
6841 
6842 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6843 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6844 	switch (rdev->family) {
6845 	case CHIP_BONAIRE:
6846 		buffer[count++] = cpu_to_le32(0x16000012);
6847 		buffer[count++] = cpu_to_le32(0x00000000);
6848 		break;
6849 	case CHIP_KAVERI:
6850 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6851 		buffer[count++] = cpu_to_le32(0x00000000);
6852 		break;
6853 	case CHIP_KABINI:
6854 	case CHIP_MULLINS:
6855 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6856 		buffer[count++] = cpu_to_le32(0x00000000);
6857 		break;
6858 	case CHIP_HAWAII:
6859 		buffer[count++] = cpu_to_le32(0x3a00161a);
6860 		buffer[count++] = cpu_to_le32(0x0000002e);
6861 		break;
6862 	default:
6863 		buffer[count++] = cpu_to_le32(0x00000000);
6864 		buffer[count++] = cpu_to_le32(0x00000000);
6865 		break;
6866 	}
6867 
6868 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6869 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6870 
6871 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6872 	buffer[count++] = cpu_to_le32(0);
6873 }
6874 
6875 static void cik_init_pg(struct radeon_device *rdev)
6876 {
6877 	if (rdev->pg_flags) {
6878 		cik_enable_sck_slowdown_on_pu(rdev, true);
6879 		cik_enable_sck_slowdown_on_pd(rdev, true);
6880 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6881 			cik_init_gfx_cgpg(rdev);
6882 			cik_enable_cp_pg(rdev, true);
6883 			cik_enable_gds_pg(rdev, true);
6884 		}
6885 		cik_init_ao_cu_mask(rdev);
6886 		cik_update_gfx_pg(rdev, true);
6887 	}
6888 }
6889 
6890 static void cik_fini_pg(struct radeon_device *rdev)
6891 {
6892 	if (rdev->pg_flags) {
6893 		cik_update_gfx_pg(rdev, false);
6894 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6895 			cik_enable_cp_pg(rdev, false);
6896 			cik_enable_gds_pg(rdev, false);
6897 		}
6898 	}
6899 }
6900 
6901 /*
6902  * Interrupts
6903  * Starting with r6xx, interrupts are handled via a ring buffer.
6904  * Ring buffers are areas of GPU accessible memory that the GPU
6905  * writes interrupt vectors into and the host reads vectors out of.
6906  * There is a rptr (read pointer) that determines where the
6907  * host is currently reading, and a wptr (write pointer)
6908  * which determines where the GPU has written.  When the
6909  * pointers are equal, the ring is idle.  When the GPU
6910  * writes vectors to the ring buffer, it increments the
6911  * wptr.  When there is an interrupt, the host then starts
6912  * fetching commands and processing them until the pointers are
6913  * equal again at which point it updates the rptr.
6914  */
6915 
6916 /**
6917  * cik_enable_interrupts - Enable the interrupt ring buffer
6918  *
6919  * @rdev: radeon_device pointer
6920  *
6921  * Enable the interrupt ring buffer (CIK).
6922  */
6923 static void cik_enable_interrupts(struct radeon_device *rdev)
6924 {
6925 	u32 ih_cntl = RREG32(IH_CNTL);
6926 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6927 
6928 	ih_cntl |= ENABLE_INTR;
6929 	ih_rb_cntl |= IH_RB_ENABLE;
6930 	WREG32(IH_CNTL, ih_cntl);
6931 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6932 	rdev->ih.enabled = true;
6933 }
6934 
6935 /**
6936  * cik_disable_interrupts - Disable the interrupt ring buffer
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Disable the interrupt ring buffer (CIK).
6941  */
6942 static void cik_disable_interrupts(struct radeon_device *rdev)
6943 {
6944 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6945 	u32 ih_cntl = RREG32(IH_CNTL);
6946 
6947 	ih_rb_cntl &= ~IH_RB_ENABLE;
6948 	ih_cntl &= ~ENABLE_INTR;
6949 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6950 	WREG32(IH_CNTL, ih_cntl);
6951 	/* set rptr, wptr to 0 */
6952 	WREG32(IH_RB_RPTR, 0);
6953 	WREG32(IH_RB_WPTR, 0);
6954 	rdev->ih.enabled = false;
6955 	rdev->ih.rptr = 0;
6956 }
6957 
6958 /**
6959  * cik_disable_interrupt_state - Disable all interrupt sources
6960  *
6961  * @rdev: radeon_device pointer
6962  *
6963  * Clear all interrupt enable bits used by the driver (CIK).
6964  */
6965 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6966 {
6967 	u32 tmp;
6968 
6969 	/* gfx ring */
6970 	tmp = RREG32(CP_INT_CNTL_RING0) &
6971 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6972 	WREG32(CP_INT_CNTL_RING0, tmp);
6973 	/* sdma */
6974 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6975 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6976 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6977 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6978 	/* compute queues */
6979 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6980 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6981 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6982 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6983 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6984 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6985 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6986 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6987 	/* grbm */
6988 	WREG32(GRBM_INT_CNTL, 0);
6989 	/* SRBM */
6990 	WREG32(SRBM_INT_CNTL, 0);
6991 	/* vline/vblank, etc. */
6992 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994 	if (rdev->num_crtc >= 4) {
6995 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6996 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6997 	}
6998 	if (rdev->num_crtc >= 6) {
6999 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7000 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7001 	}
7002 	/* pflip */
7003 	if (rdev->num_crtc >= 2) {
7004 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7005 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7006 	}
7007 	if (rdev->num_crtc >= 4) {
7008 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7009 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7010 	}
7011 	if (rdev->num_crtc >= 6) {
7012 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7013 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7014 	}
7015 
7016 	/* dac hotplug */
7017 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7018 
7019 	/* digital hotplug */
7020 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7021 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7022 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7023 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7024 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7025 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7026 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7027 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7028 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7029 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7030 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7031 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7032 
7033 }
7034 
7035 /**
7036  * cik_irq_init - init and enable the interrupt ring
7037  *
7038  * @rdev: radeon_device pointer
7039  *
7040  * Allocate a ring buffer for the interrupt controller,
7041  * enable the RLC, disable interrupts, enable the IH
7042  * ring buffer and enable it (CIK).
7043  * Called at device load and reume.
7044  * Returns 0 for success, errors for failure.
7045  */
7046 static int cik_irq_init(struct radeon_device *rdev)
7047 {
7048 	int ret = 0;
7049 	int rb_bufsz;
7050 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7051 
7052 	/* allocate ring */
7053 	ret = r600_ih_ring_alloc(rdev);
7054 	if (ret)
7055 		return ret;
7056 
7057 	/* disable irqs */
7058 	cik_disable_interrupts(rdev);
7059 
7060 	/* init rlc */
7061 	ret = cik_rlc_resume(rdev);
7062 	if (ret) {
7063 		r600_ih_ring_fini(rdev);
7064 		return ret;
7065 	}
7066 
7067 	/* setup interrupt control */
7068 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7069 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7070 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7071 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7072 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7073 	 */
7074 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7075 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7076 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7077 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7078 
7079 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7080 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7081 
7082 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7083 		      IH_WPTR_OVERFLOW_CLEAR |
7084 		      (rb_bufsz << 1));
7085 
7086 	if (rdev->wb.enabled)
7087 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7088 
7089 	/* set the writeback address whether it's enabled or not */
7090 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7091 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7092 
7093 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7094 
7095 	/* set rptr, wptr to 0 */
7096 	WREG32(IH_RB_RPTR, 0);
7097 	WREG32(IH_RB_WPTR, 0);
7098 
7099 	/* Default settings for IH_CNTL (disabled at first) */
7100 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7101 	/* RPTR_REARM only works if msi's are enabled */
7102 	if (rdev->msi_enabled)
7103 		ih_cntl |= RPTR_REARM;
7104 	WREG32(IH_CNTL, ih_cntl);
7105 
7106 	/* force the active interrupt state to all disabled */
7107 	cik_disable_interrupt_state(rdev);
7108 
7109 	pci_set_master(rdev->pdev);
7110 
7111 	/* enable irqs */
7112 	cik_enable_interrupts(rdev);
7113 
7114 	return ret;
7115 }
7116 
7117 /**
7118  * cik_irq_set - enable/disable interrupt sources
7119  *
7120  * @rdev: radeon_device pointer
7121  *
7122  * Enable interrupt sources on the GPU (vblanks, hpd,
7123  * etc.) (CIK).
7124  * Returns 0 for success, errors for failure.
7125  */
7126 int cik_irq_set(struct radeon_device *rdev)
7127 {
7128 	u32 cp_int_cntl;
7129 	u32 cp_m1p0;
7130 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7131 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7132 	u32 grbm_int_cntl = 0;
7133 	u32 dma_cntl, dma_cntl1;
7134 
7135 	if (!rdev->irq.installed) {
7136 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7137 		return -EINVAL;
7138 	}
7139 	/* don't enable anything if the ih is disabled */
7140 	if (!rdev->ih.enabled) {
7141 		cik_disable_interrupts(rdev);
7142 		/* force the active interrupt state to all disabled */
7143 		cik_disable_interrupt_state(rdev);
7144 		return 0;
7145 	}
7146 
7147 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7148 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7149 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7150 
7151 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7152 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7153 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7154 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7155 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7156 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7157 
7158 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7159 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7160 
7161 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7162 
7163 	/* enable CP interrupts on all rings */
7164 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7165 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7166 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7167 	}
7168 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7169 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7170 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7171 		if (ring->me == 1) {
7172 			switch (ring->pipe) {
7173 			case 0:
7174 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7175 				break;
7176 			default:
7177 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7178 				break;
7179 			}
7180 		} else {
7181 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7182 		}
7183 	}
7184 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7185 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7186 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7187 		if (ring->me == 1) {
7188 			switch (ring->pipe) {
7189 			case 0:
7190 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7191 				break;
7192 			default:
7193 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7194 				break;
7195 			}
7196 		} else {
7197 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7198 		}
7199 	}
7200 
7201 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7202 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7203 		dma_cntl |= TRAP_ENABLE;
7204 	}
7205 
7206 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7207 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7208 		dma_cntl1 |= TRAP_ENABLE;
7209 	}
7210 
7211 	if (rdev->irq.crtc_vblank_int[0] ||
7212 	    atomic_read(&rdev->irq.pflip[0])) {
7213 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7214 		crtc1 |= VBLANK_INTERRUPT_MASK;
7215 	}
7216 	if (rdev->irq.crtc_vblank_int[1] ||
7217 	    atomic_read(&rdev->irq.pflip[1])) {
7218 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7219 		crtc2 |= VBLANK_INTERRUPT_MASK;
7220 	}
7221 	if (rdev->irq.crtc_vblank_int[2] ||
7222 	    atomic_read(&rdev->irq.pflip[2])) {
7223 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7224 		crtc3 |= VBLANK_INTERRUPT_MASK;
7225 	}
7226 	if (rdev->irq.crtc_vblank_int[3] ||
7227 	    atomic_read(&rdev->irq.pflip[3])) {
7228 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7229 		crtc4 |= VBLANK_INTERRUPT_MASK;
7230 	}
7231 	if (rdev->irq.crtc_vblank_int[4] ||
7232 	    atomic_read(&rdev->irq.pflip[4])) {
7233 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7234 		crtc5 |= VBLANK_INTERRUPT_MASK;
7235 	}
7236 	if (rdev->irq.crtc_vblank_int[5] ||
7237 	    atomic_read(&rdev->irq.pflip[5])) {
7238 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7239 		crtc6 |= VBLANK_INTERRUPT_MASK;
7240 	}
7241 	if (rdev->irq.hpd[0]) {
7242 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7243 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244 	}
7245 	if (rdev->irq.hpd[1]) {
7246 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7247 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248 	}
7249 	if (rdev->irq.hpd[2]) {
7250 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7251 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252 	}
7253 	if (rdev->irq.hpd[3]) {
7254 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7255 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7256 	}
7257 	if (rdev->irq.hpd[4]) {
7258 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7259 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7260 	}
7261 	if (rdev->irq.hpd[5]) {
7262 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7263 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7264 	}
7265 
7266 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7267 
7268 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7269 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7270 
7271 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7272 
7273 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7274 
7275 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7276 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7277 	if (rdev->num_crtc >= 4) {
7278 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7279 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7280 	}
7281 	if (rdev->num_crtc >= 6) {
7282 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7283 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7284 	}
7285 
7286 	if (rdev->num_crtc >= 2) {
7287 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7288 		       GRPH_PFLIP_INT_MASK);
7289 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7290 		       GRPH_PFLIP_INT_MASK);
7291 	}
7292 	if (rdev->num_crtc >= 4) {
7293 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7294 		       GRPH_PFLIP_INT_MASK);
7295 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7296 		       GRPH_PFLIP_INT_MASK);
7297 	}
7298 	if (rdev->num_crtc >= 6) {
7299 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7300 		       GRPH_PFLIP_INT_MASK);
7301 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7302 		       GRPH_PFLIP_INT_MASK);
7303 	}
7304 
7305 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7306 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7307 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7308 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7309 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7310 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7311 
7312 	/* posting read */
7313 	RREG32(SRBM_STATUS);
7314 
7315 	return 0;
7316 }
7317 
7318 /**
7319  * cik_irq_ack - ack interrupt sources
7320  *
7321  * @rdev: radeon_device pointer
7322  *
7323  * Ack interrupt sources on the GPU (vblanks, hpd,
7324  * etc.) (CIK).  Certain interrupts sources are sw
7325  * generated and do not require an explicit ack.
7326  */
7327 static inline void cik_irq_ack(struct radeon_device *rdev)
7328 {
7329 	u32 tmp;
7330 
7331 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7332 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7333 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7334 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7335 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7336 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7337 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7338 
7339 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7340 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7341 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7342 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7343 	if (rdev->num_crtc >= 4) {
7344 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7345 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7346 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7347 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7348 	}
7349 	if (rdev->num_crtc >= 6) {
7350 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7351 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7352 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7353 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7354 	}
7355 
7356 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7357 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7358 		       GRPH_PFLIP_INT_CLEAR);
7359 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7360 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7361 		       GRPH_PFLIP_INT_CLEAR);
7362 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7363 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7364 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7365 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7366 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7367 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7368 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7369 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7370 
7371 	if (rdev->num_crtc >= 4) {
7372 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7373 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7374 			       GRPH_PFLIP_INT_CLEAR);
7375 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7376 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7377 			       GRPH_PFLIP_INT_CLEAR);
7378 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7379 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7380 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7381 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7382 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7383 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7384 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7385 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7386 	}
7387 
7388 	if (rdev->num_crtc >= 6) {
7389 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7390 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7391 			       GRPH_PFLIP_INT_CLEAR);
7392 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7393 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7394 			       GRPH_PFLIP_INT_CLEAR);
7395 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7396 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7397 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7398 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7399 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7400 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7401 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7402 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7403 	}
7404 
7405 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7406 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7407 		tmp |= DC_HPDx_INT_ACK;
7408 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7409 	}
7410 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7411 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7412 		tmp |= DC_HPDx_INT_ACK;
7413 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7414 	}
7415 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7416 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7417 		tmp |= DC_HPDx_INT_ACK;
7418 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7419 	}
7420 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7421 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7422 		tmp |= DC_HPDx_INT_ACK;
7423 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7424 	}
7425 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7426 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7427 		tmp |= DC_HPDx_INT_ACK;
7428 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7429 	}
7430 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7431 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7432 		tmp |= DC_HPDx_INT_ACK;
7433 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7434 	}
7435 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7436 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7437 		tmp |= DC_HPDx_RX_INT_ACK;
7438 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7439 	}
7440 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7441 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7442 		tmp |= DC_HPDx_RX_INT_ACK;
7443 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7444 	}
7445 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7446 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7447 		tmp |= DC_HPDx_RX_INT_ACK;
7448 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7449 	}
7450 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7451 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7452 		tmp |= DC_HPDx_RX_INT_ACK;
7453 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7454 	}
7455 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7456 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7457 		tmp |= DC_HPDx_RX_INT_ACK;
7458 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7459 	}
7460 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7461 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7462 		tmp |= DC_HPDx_RX_INT_ACK;
7463 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7464 	}
7465 }
7466 
7467 /**
7468  * cik_irq_disable - disable interrupts
7469  *
7470  * @rdev: radeon_device pointer
7471  *
7472  * Disable interrupts on the hw (CIK).
7473  */
7474 static void cik_irq_disable(struct radeon_device *rdev)
7475 {
7476 	cik_disable_interrupts(rdev);
7477 	/* Wait and acknowledge irq */
7478 	mdelay(1);
7479 	cik_irq_ack(rdev);
7480 	cik_disable_interrupt_state(rdev);
7481 }
7482 
7483 /**
7484  * cik_irq_disable - disable interrupts for suspend
7485  *
7486  * @rdev: radeon_device pointer
7487  *
7488  * Disable interrupts and stop the RLC (CIK).
7489  * Used for suspend.
7490  */
7491 static void cik_irq_suspend(struct radeon_device *rdev)
7492 {
7493 	cik_irq_disable(rdev);
7494 	cik_rlc_stop(rdev);
7495 }
7496 
7497 /**
7498  * cik_irq_fini - tear down interrupt support
7499  *
7500  * @rdev: radeon_device pointer
7501  *
7502  * Disable interrupts on the hw and free the IH ring
7503  * buffer (CIK).
7504  * Used for driver unload.
7505  */
7506 static void cik_irq_fini(struct radeon_device *rdev)
7507 {
7508 	cik_irq_suspend(rdev);
7509 	r600_ih_ring_fini(rdev);
7510 }
7511 
7512 /**
7513  * cik_get_ih_wptr - get the IH ring buffer wptr
7514  *
7515  * @rdev: radeon_device pointer
7516  *
7517  * Get the IH ring buffer wptr from either the register
7518  * or the writeback memory buffer (CIK).  Also check for
7519  * ring buffer overflow and deal with it.
7520  * Used by cik_irq_process().
7521  * Returns the value of the wptr.
7522  */
7523 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7524 {
7525 	u32 wptr, tmp;
7526 
7527 	if (rdev->wb.enabled)
7528 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7529 	else
7530 		wptr = RREG32(IH_RB_WPTR);
7531 
7532 	if (wptr & RB_OVERFLOW) {
7533 		wptr &= ~RB_OVERFLOW;
7534 		/* When a ring buffer overflow happen start parsing interrupt
7535 		 * from the last not overwritten vector (wptr + 16). Hopefully
7536 		 * this should allow us to catchup.
7537 		 */
7538 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7539 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7540 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7541 		tmp = RREG32(IH_RB_CNTL);
7542 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7543 		WREG32(IH_RB_CNTL, tmp);
7544 	}
7545 	return (wptr & rdev->ih.ptr_mask);
7546 }
7547 
7548 /*        CIK IV Ring
7549  * Each IV ring entry is 128 bits:
7550  * [7:0]    - interrupt source id
7551  * [31:8]   - reserved
7552  * [59:32]  - interrupt source data
7553  * [63:60]  - reserved
7554  * [71:64]  - RINGID
7555  *            CP:
7556  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7557  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7558  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7559  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7560  *            PIPE_ID - ME0 0=3D
7561  *                    - ME1&2 compute dispatcher (4 pipes each)
7562  *            SDMA:
7563  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7564  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7565  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7566  * [79:72]  - VMID
7567  * [95:80]  - PASID
7568  * [127:96] - reserved
7569  */
7570 /**
7571  * cik_irq_process - interrupt handler
7572  *
7573  * @rdev: radeon_device pointer
7574  *
7575  * Interrupt hander (CIK).  Walk the IH ring,
7576  * ack interrupts and schedule work to handle
7577  * interrupt events.
7578  * Returns irq process return code.
7579  */
7580 int cik_irq_process(struct radeon_device *rdev)
7581 {
7582 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7583 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7584 	u32 wptr;
7585 	u32 rptr;
7586 	u32 src_id, src_data, ring_id;
7587 	u8 me_id, pipe_id, queue_id;
7588 	u32 ring_index;
7589 	bool queue_hotplug = false;
7590 	bool queue_dp = false;
7591 	bool queue_reset = false;
7592 	u32 addr, status, mc_client;
7593 	bool queue_thermal = false;
7594 
7595 	if (!rdev->ih.enabled || rdev->shutdown)
7596 		return IRQ_NONE;
7597 
7598 	wptr = cik_get_ih_wptr(rdev);
7599 
7600 restart_ih:
7601 	/* is somebody else already processing irqs? */
7602 	if (atomic_xchg(&rdev->ih.lock, 1))
7603 		return IRQ_NONE;
7604 
7605 	rptr = rdev->ih.rptr;
7606 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7607 
7608 	/* Order reading of wptr vs. reading of IH ring data */
7609 	rmb();
7610 
7611 	/* display interrupts */
7612 	cik_irq_ack(rdev);
7613 
7614 	while (rptr != wptr) {
7615 		/* wptr/rptr are in bytes! */
7616 		ring_index = rptr / 4;
7617 
7618 		radeon_kfd_interrupt(rdev,
7619 				(const void *) &rdev->ih.ring[ring_index]);
7620 
7621 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7622 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7623 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7624 
7625 		switch (src_id) {
7626 		case 1: /* D1 vblank/vline */
7627 			switch (src_data) {
7628 			case 0: /* D1 vblank */
7629 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7630 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631 
7632 				if (rdev->irq.crtc_vblank_int[0]) {
7633 					drm_handle_vblank(rdev->ddev, 0);
7634 					rdev->pm.vblank_sync = true;
7635 					wake_up(&rdev->irq.vblank_queue);
7636 				}
7637 				if (atomic_read(&rdev->irq.pflip[0]))
7638 					radeon_crtc_handle_vblank(rdev, 0);
7639 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7640 				DRM_DEBUG("IH: D1 vblank\n");
7641 
7642 				break;
7643 			case 1: /* D1 vline */
7644 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7645 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646 
7647 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7648 				DRM_DEBUG("IH: D1 vline\n");
7649 
7650 				break;
7651 			default:
7652 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653 				break;
7654 			}
7655 			break;
7656 		case 2: /* D2 vblank/vline */
7657 			switch (src_data) {
7658 			case 0: /* D2 vblank */
7659 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7660 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661 
7662 				if (rdev->irq.crtc_vblank_int[1]) {
7663 					drm_handle_vblank(rdev->ddev, 1);
7664 					rdev->pm.vblank_sync = true;
7665 					wake_up(&rdev->irq.vblank_queue);
7666 				}
7667 				if (atomic_read(&rdev->irq.pflip[1]))
7668 					radeon_crtc_handle_vblank(rdev, 1);
7669 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7670 				DRM_DEBUG("IH: D2 vblank\n");
7671 
7672 				break;
7673 			case 1: /* D2 vline */
7674 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7675 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676 
7677 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7678 				DRM_DEBUG("IH: D2 vline\n");
7679 
7680 				break;
7681 			default:
7682 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683 				break;
7684 			}
7685 			break;
7686 		case 3: /* D3 vblank/vline */
7687 			switch (src_data) {
7688 			case 0: /* D3 vblank */
7689 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7690 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691 
7692 				if (rdev->irq.crtc_vblank_int[2]) {
7693 					drm_handle_vblank(rdev->ddev, 2);
7694 					rdev->pm.vblank_sync = true;
7695 					wake_up(&rdev->irq.vblank_queue);
7696 				}
7697 				if (atomic_read(&rdev->irq.pflip[2]))
7698 					radeon_crtc_handle_vblank(rdev, 2);
7699 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7700 				DRM_DEBUG("IH: D3 vblank\n");
7701 
7702 				break;
7703 			case 1: /* D3 vline */
7704 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7705 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 
7707 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7708 				DRM_DEBUG("IH: D3 vline\n");
7709 
7710 				break;
7711 			default:
7712 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713 				break;
7714 			}
7715 			break;
7716 		case 4: /* D4 vblank/vline */
7717 			switch (src_data) {
7718 			case 0: /* D4 vblank */
7719 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7720 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721 
7722 				if (rdev->irq.crtc_vblank_int[3]) {
7723 					drm_handle_vblank(rdev->ddev, 3);
7724 					rdev->pm.vblank_sync = true;
7725 					wake_up(&rdev->irq.vblank_queue);
7726 				}
7727 				if (atomic_read(&rdev->irq.pflip[3]))
7728 					radeon_crtc_handle_vblank(rdev, 3);
7729 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7730 				DRM_DEBUG("IH: D4 vblank\n");
7731 
7732 				break;
7733 			case 1: /* D4 vline */
7734 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7735 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736 
7737 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7738 				DRM_DEBUG("IH: D4 vline\n");
7739 
7740 				break;
7741 			default:
7742 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743 				break;
7744 			}
7745 			break;
7746 		case 5: /* D5 vblank/vline */
7747 			switch (src_data) {
7748 			case 0: /* D5 vblank */
7749 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7750 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751 
7752 				if (rdev->irq.crtc_vblank_int[4]) {
7753 					drm_handle_vblank(rdev->ddev, 4);
7754 					rdev->pm.vblank_sync = true;
7755 					wake_up(&rdev->irq.vblank_queue);
7756 				}
7757 				if (atomic_read(&rdev->irq.pflip[4]))
7758 					radeon_crtc_handle_vblank(rdev, 4);
7759 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7760 				DRM_DEBUG("IH: D5 vblank\n");
7761 
7762 				break;
7763 			case 1: /* D5 vline */
7764 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7765 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766 
7767 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7768 				DRM_DEBUG("IH: D5 vline\n");
7769 
7770 				break;
7771 			default:
7772 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773 				break;
7774 			}
7775 			break;
7776 		case 6: /* D6 vblank/vline */
7777 			switch (src_data) {
7778 			case 0: /* D6 vblank */
7779 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7780 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7781 
7782 				if (rdev->irq.crtc_vblank_int[5]) {
7783 					drm_handle_vblank(rdev->ddev, 5);
7784 					rdev->pm.vblank_sync = true;
7785 					wake_up(&rdev->irq.vblank_queue);
7786 				}
7787 				if (atomic_read(&rdev->irq.pflip[5]))
7788 					radeon_crtc_handle_vblank(rdev, 5);
7789 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7790 				DRM_DEBUG("IH: D6 vblank\n");
7791 
7792 				break;
7793 			case 1: /* D6 vline */
7794 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7795 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796 
7797 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7798 				DRM_DEBUG("IH: D6 vline\n");
7799 
7800 				break;
7801 			default:
7802 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7803 				break;
7804 			}
7805 			break;
7806 		case 8: /* D1 page flip */
7807 		case 10: /* D2 page flip */
7808 		case 12: /* D3 page flip */
7809 		case 14: /* D4 page flip */
7810 		case 16: /* D5 page flip */
7811 		case 18: /* D6 page flip */
7812 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7813 			if (radeon_use_pflipirq > 0)
7814 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7815 			break;
7816 		case 42: /* HPD hotplug */
7817 			switch (src_data) {
7818 			case 0:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD1\n");
7825 
7826 				break;
7827 			case 1:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7832 				queue_hotplug = true;
7833 				DRM_DEBUG("IH: HPD2\n");
7834 
7835 				break;
7836 			case 2:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7841 				queue_hotplug = true;
7842 				DRM_DEBUG("IH: HPD3\n");
7843 
7844 				break;
7845 			case 3:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7850 				queue_hotplug = true;
7851 				DRM_DEBUG("IH: HPD4\n");
7852 
7853 				break;
7854 			case 4:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7859 				queue_hotplug = true;
7860 				DRM_DEBUG("IH: HPD5\n");
7861 
7862 				break;
7863 			case 5:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7868 				queue_hotplug = true;
7869 				DRM_DEBUG("IH: HPD6\n");
7870 
7871 				break;
7872 			case 6:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 1\n");
7879 
7880 				break;
7881 			case 7:
7882 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7883 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884 
7885 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7886 				queue_dp = true;
7887 				DRM_DEBUG("IH: HPD_RX 2\n");
7888 
7889 				break;
7890 			case 8:
7891 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7892 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893 
7894 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7895 				queue_dp = true;
7896 				DRM_DEBUG("IH: HPD_RX 3\n");
7897 
7898 				break;
7899 			case 9:
7900 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7901 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7902 
7903 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7904 				queue_dp = true;
7905 				DRM_DEBUG("IH: HPD_RX 4\n");
7906 
7907 				break;
7908 			case 10:
7909 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7910 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7911 
7912 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7913 				queue_dp = true;
7914 				DRM_DEBUG("IH: HPD_RX 5\n");
7915 
7916 				break;
7917 			case 11:
7918 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7919 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7920 
7921 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7922 				queue_dp = true;
7923 				DRM_DEBUG("IH: HPD_RX 6\n");
7924 
7925 				break;
7926 			default:
7927 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928 				break;
7929 			}
7930 			break;
7931 		case 96:
7932 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7933 			WREG32(SRBM_INT_ACK, 0x1);
7934 			break;
7935 		case 124: /* UVD */
7936 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7937 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7938 			break;
7939 		case 146:
7940 		case 147:
7941 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7942 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7943 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7944 			/* reset addr and status */
7945 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7946 			if (addr == 0x0 && status == 0x0)
7947 				break;
7948 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7949 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7950 				addr);
7951 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7952 				status);
7953 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7954 			break;
7955 		case 167: /* VCE */
7956 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7957 			switch (src_data) {
7958 			case 0:
7959 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7960 				break;
7961 			case 1:
7962 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7963 				break;
7964 			default:
7965 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7966 				break;
7967 			}
7968 			break;
7969 		case 176: /* GFX RB CP_INT */
7970 		case 177: /* GFX IB CP_INT */
7971 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7972 			break;
7973 		case 181: /* CP EOP event */
7974 			DRM_DEBUG("IH: CP EOP\n");
7975 			/* XXX check the bitfield order! */
7976 			me_id = (ring_id & 0x60) >> 5;
7977 			pipe_id = (ring_id & 0x18) >> 3;
7978 			queue_id = (ring_id & 0x7) >> 0;
7979 			switch (me_id) {
7980 			case 0:
7981 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7982 				break;
7983 			case 1:
7984 			case 2:
7985 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7986 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7987 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7988 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7989 				break;
7990 			}
7991 			break;
7992 		case 184: /* CP Privileged reg access */
7993 			DRM_ERROR("Illegal register access in command stream\n");
7994 			/* XXX check the bitfield order! */
7995 			me_id = (ring_id & 0x60) >> 5;
7996 			pipe_id = (ring_id & 0x18) >> 3;
7997 			queue_id = (ring_id & 0x7) >> 0;
7998 			switch (me_id) {
7999 			case 0:
8000 				/* This results in a full GPU reset, but all we need to do is soft
8001 				 * reset the CP for gfx
8002 				 */
8003 				queue_reset = true;
8004 				break;
8005 			case 1:
8006 				/* XXX compute */
8007 				queue_reset = true;
8008 				break;
8009 			case 2:
8010 				/* XXX compute */
8011 				queue_reset = true;
8012 				break;
8013 			}
8014 			break;
8015 		case 185: /* CP Privileged inst */
8016 			DRM_ERROR("Illegal instruction in command stream\n");
8017 			/* XXX check the bitfield order! */
8018 			me_id = (ring_id & 0x60) >> 5;
8019 			pipe_id = (ring_id & 0x18) >> 3;
8020 			queue_id = (ring_id & 0x7) >> 0;
8021 			switch (me_id) {
8022 			case 0:
8023 				/* This results in a full GPU reset, but all we need to do is soft
8024 				 * reset the CP for gfx
8025 				 */
8026 				queue_reset = true;
8027 				break;
8028 			case 1:
8029 				/* XXX compute */
8030 				queue_reset = true;
8031 				break;
8032 			case 2:
8033 				/* XXX compute */
8034 				queue_reset = true;
8035 				break;
8036 			}
8037 			break;
8038 		case 224: /* SDMA trap event */
8039 			/* XXX check the bitfield order! */
8040 			me_id = (ring_id & 0x3) >> 0;
8041 			queue_id = (ring_id & 0xc) >> 2;
8042 			DRM_DEBUG("IH: SDMA trap\n");
8043 			switch (me_id) {
8044 			case 0:
8045 				switch (queue_id) {
8046 				case 0:
8047 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8048 					break;
8049 				case 1:
8050 					/* XXX compute */
8051 					break;
8052 				case 2:
8053 					/* XXX compute */
8054 					break;
8055 				}
8056 				break;
8057 			case 1:
8058 				switch (queue_id) {
8059 				case 0:
8060 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8061 					break;
8062 				case 1:
8063 					/* XXX compute */
8064 					break;
8065 				case 2:
8066 					/* XXX compute */
8067 					break;
8068 				}
8069 				break;
8070 			}
8071 			break;
8072 		case 230: /* thermal low to high */
8073 			DRM_DEBUG("IH: thermal low to high\n");
8074 			rdev->pm.dpm.thermal.high_to_low = false;
8075 			queue_thermal = true;
8076 			break;
8077 		case 231: /* thermal high to low */
8078 			DRM_DEBUG("IH: thermal high to low\n");
8079 			rdev->pm.dpm.thermal.high_to_low = true;
8080 			queue_thermal = true;
8081 			break;
8082 		case 233: /* GUI IDLE */
8083 			DRM_DEBUG("IH: GUI idle\n");
8084 			break;
8085 		case 241: /* SDMA Privileged inst */
8086 		case 247: /* SDMA Privileged inst */
8087 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8088 			/* XXX check the bitfield order! */
8089 			me_id = (ring_id & 0x3) >> 0;
8090 			queue_id = (ring_id & 0xc) >> 2;
8091 			switch (me_id) {
8092 			case 0:
8093 				switch (queue_id) {
8094 				case 0:
8095 					queue_reset = true;
8096 					break;
8097 				case 1:
8098 					/* XXX compute */
8099 					queue_reset = true;
8100 					break;
8101 				case 2:
8102 					/* XXX compute */
8103 					queue_reset = true;
8104 					break;
8105 				}
8106 				break;
8107 			case 1:
8108 				switch (queue_id) {
8109 				case 0:
8110 					queue_reset = true;
8111 					break;
8112 				case 1:
8113 					/* XXX compute */
8114 					queue_reset = true;
8115 					break;
8116 				case 2:
8117 					/* XXX compute */
8118 					queue_reset = true;
8119 					break;
8120 				}
8121 				break;
8122 			}
8123 			break;
8124 		default:
8125 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8126 			break;
8127 		}
8128 
8129 		/* wptr/rptr are in bytes! */
8130 		rptr += 16;
8131 		rptr &= rdev->ih.ptr_mask;
8132 		WREG32(IH_RB_RPTR, rptr);
8133 	}
8134 	if (queue_dp)
8135 		schedule_work(&rdev->dp_work);
8136 	if (queue_hotplug)
8137 		schedule_delayed_work(&rdev->hotplug_work, 0);
8138 	if (queue_reset) {
8139 		rdev->needs_reset = true;
8140 		wake_up_all(&rdev->fence_queue);
8141 	}
8142 	if (queue_thermal)
8143 		schedule_work(&rdev->pm.dpm.thermal.work);
8144 	rdev->ih.rptr = rptr;
8145 	atomic_set(&rdev->ih.lock, 0);
8146 
8147 	/* make sure wptr hasn't changed while processing */
8148 	wptr = cik_get_ih_wptr(rdev);
8149 	if (wptr != rptr)
8150 		goto restart_ih;
8151 
8152 	return IRQ_HANDLED;
8153 }
8154 
8155 /*
8156  * startup/shutdown callbacks
8157  */
8158 static void cik_uvd_init(struct radeon_device *rdev)
8159 {
8160 	int r;
8161 
8162 	if (!rdev->has_uvd)
8163 		return;
8164 
8165 	r = radeon_uvd_init(rdev);
8166 	if (r) {
8167 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8168 		/*
8169 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8170 		 * to early fails cik_uvd_start() and thus nothing happens
8171 		 * there. So it is pointless to try to go through that code
8172 		 * hence why we disable uvd here.
8173 		 */
8174 		rdev->has_uvd = 0;
8175 		return;
8176 	}
8177 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8178 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8179 }
8180 
8181 static void cik_uvd_start(struct radeon_device *rdev)
8182 {
8183 	int r;
8184 
8185 	if (!rdev->has_uvd)
8186 		return;
8187 
8188 	r = radeon_uvd_resume(rdev);
8189 	if (r) {
8190 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8191 		goto error;
8192 	}
8193 	r = uvd_v4_2_resume(rdev);
8194 	if (r) {
8195 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8196 		goto error;
8197 	}
8198 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8199 	if (r) {
8200 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8201 		goto error;
8202 	}
8203 	return;
8204 
8205 error:
8206 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8207 }
8208 
8209 static void cik_uvd_resume(struct radeon_device *rdev)
8210 {
8211 	struct radeon_ring *ring;
8212 	int r;
8213 
8214 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8215 		return;
8216 
8217 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8218 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
8219 	if (r) {
8220 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8221 		return;
8222 	}
8223 	r = uvd_v1_0_init(rdev);
8224 	if (r) {
8225 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8226 		return;
8227 	}
8228 }
8229 
8230 static void cik_vce_init(struct radeon_device *rdev)
8231 {
8232 	int r;
8233 
8234 	if (!rdev->has_vce)
8235 		return;
8236 
8237 	r = radeon_vce_init(rdev);
8238 	if (r) {
8239 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8240 		/*
8241 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8242 		 * to early fails cik_vce_start() and thus nothing happens
8243 		 * there. So it is pointless to try to go through that code
8244 		 * hence why we disable vce here.
8245 		 */
8246 		rdev->has_vce = 0;
8247 		return;
8248 	}
8249 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8250 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8251 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8252 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8253 }
8254 
8255 static void cik_vce_start(struct radeon_device *rdev)
8256 {
8257 	int r;
8258 
8259 	if (!rdev->has_vce)
8260 		return;
8261 
8262 	r = radeon_vce_resume(rdev);
8263 	if (r) {
8264 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8265 		goto error;
8266 	}
8267 	r = vce_v2_0_resume(rdev);
8268 	if (r) {
8269 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8270 		goto error;
8271 	}
8272 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8273 	if (r) {
8274 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8275 		goto error;
8276 	}
8277 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8278 	if (r) {
8279 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8280 		goto error;
8281 	}
8282 	return;
8283 
8284 error:
8285 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8286 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8287 }
8288 
8289 static void cik_vce_resume(struct radeon_device *rdev)
8290 {
8291 	struct radeon_ring *ring;
8292 	int r;
8293 
8294 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8295 		return;
8296 
8297 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8298 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8299 	if (r) {
8300 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8301 		return;
8302 	}
8303 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8304 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8305 	if (r) {
8306 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8307 		return;
8308 	}
8309 	r = vce_v1_0_init(rdev);
8310 	if (r) {
8311 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8312 		return;
8313 	}
8314 }
8315 
8316 /**
8317  * cik_startup - program the asic to a functional state
8318  *
8319  * @rdev: radeon_device pointer
8320  *
8321  * Programs the asic to a functional state (CIK).
8322  * Called by cik_init() and cik_resume().
8323  * Returns 0 for success, error for failure.
8324  */
8325 static int cik_startup(struct radeon_device *rdev)
8326 {
8327 	struct radeon_ring *ring;
8328 	u32 nop;
8329 	int r;
8330 
8331 	/* enable pcie gen2/3 link */
8332 	cik_pcie_gen3_enable(rdev);
8333 	/* enable aspm */
8334 	cik_program_aspm(rdev);
8335 
8336 	/* scratch needs to be initialized before MC */
8337 	r = r600_vram_scratch_init(rdev);
8338 	if (r)
8339 		return r;
8340 
8341 	cik_mc_program(rdev);
8342 
8343 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8344 		r = ci_mc_load_microcode(rdev);
8345 		if (r) {
8346 			DRM_ERROR("Failed to load MC firmware!\n");
8347 			return r;
8348 		}
8349 	}
8350 
8351 	r = cik_pcie_gart_enable(rdev);
8352 	if (r)
8353 		return r;
8354 	cik_gpu_init(rdev);
8355 
8356 	/* allocate rlc buffers */
8357 	if (rdev->flags & RADEON_IS_IGP) {
8358 		if (rdev->family == CHIP_KAVERI) {
8359 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8360 			rdev->rlc.reg_list_size =
8361 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8362 		} else {
8363 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8364 			rdev->rlc.reg_list_size =
8365 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8366 		}
8367 	}
8368 	rdev->rlc.cs_data = ci_cs_data;
8369 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8370 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8371 	r = sumo_rlc_init(rdev);
8372 	if (r) {
8373 		DRM_ERROR("Failed to init rlc BOs!\n");
8374 		return r;
8375 	}
8376 
8377 	/* allocate wb buffer */
8378 	r = radeon_wb_init(rdev);
8379 	if (r)
8380 		return r;
8381 
8382 	/* allocate mec buffers */
8383 	r = cik_mec_init(rdev);
8384 	if (r) {
8385 		DRM_ERROR("Failed to init MEC BOs!\n");
8386 		return r;
8387 	}
8388 
8389 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8390 	if (r) {
8391 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392 		return r;
8393 	}
8394 
8395 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8396 	if (r) {
8397 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8398 		return r;
8399 	}
8400 
8401 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8402 	if (r) {
8403 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8404 		return r;
8405 	}
8406 
8407 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8408 	if (r) {
8409 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8410 		return r;
8411 	}
8412 
8413 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8414 	if (r) {
8415 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8416 		return r;
8417 	}
8418 
8419 	cik_uvd_start(rdev);
8420 	cik_vce_start(rdev);
8421 
8422 	/* Enable IRQ */
8423 	if (!rdev->irq.installed) {
8424 		r = radeon_irq_kms_init(rdev);
8425 		if (r)
8426 			return r;
8427 	}
8428 
8429 	r = cik_irq_init(rdev);
8430 	if (r) {
8431 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8432 		radeon_irq_kms_fini(rdev);
8433 		return r;
8434 	}
8435 	cik_irq_set(rdev);
8436 
8437 	if (rdev->family == CHIP_HAWAII) {
8438 		if (rdev->new_fw)
8439 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8440 		else
8441 			nop = RADEON_CP_PACKET2;
8442 	} else {
8443 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8444 	}
8445 
8446 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8447 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8448 			     nop);
8449 	if (r)
8450 		return r;
8451 
8452 	/* set up the compute queues */
8453 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8454 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8455 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8456 			     nop);
8457 	if (r)
8458 		return r;
8459 	ring->me = 1; /* first MEC */
8460 	ring->pipe = 0; /* first pipe */
8461 	ring->queue = 0; /* first queue */
8462 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8463 
8464 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8465 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8466 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8467 			     nop);
8468 	if (r)
8469 		return r;
8470 	/* dGPU only have 1 MEC */
8471 	ring->me = 1; /* first MEC */
8472 	ring->pipe = 0; /* first pipe */
8473 	ring->queue = 1; /* second queue */
8474 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8475 
8476 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8477 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8478 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8479 	if (r)
8480 		return r;
8481 
8482 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8483 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8484 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8485 	if (r)
8486 		return r;
8487 
8488 	r = cik_cp_resume(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	r = cik_sdma_resume(rdev);
8493 	if (r)
8494 		return r;
8495 
8496 	cik_uvd_resume(rdev);
8497 	cik_vce_resume(rdev);
8498 
8499 	r = radeon_ib_pool_init(rdev);
8500 	if (r) {
8501 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8502 		return r;
8503 	}
8504 
8505 	r = radeon_vm_manager_init(rdev);
8506 	if (r) {
8507 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8508 		return r;
8509 	}
8510 
8511 	r = radeon_audio_init(rdev);
8512 	if (r)
8513 		return r;
8514 
8515 	r = radeon_kfd_resume(rdev);
8516 	if (r)
8517 		return r;
8518 
8519 	return 0;
8520 }
8521 
8522 /**
8523  * cik_resume - resume the asic to a functional state
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Programs the asic to a functional state (CIK).
8528  * Called at resume.
8529  * Returns 0 for success, error for failure.
8530  */
8531 int cik_resume(struct radeon_device *rdev)
8532 {
8533 	int r;
8534 
8535 	/* post card */
8536 	atom_asic_init(rdev->mode_info.atom_context);
8537 
8538 	/* init golden registers */
8539 	cik_init_golden_registers(rdev);
8540 
8541 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8542 		radeon_pm_resume(rdev);
8543 
8544 	rdev->accel_working = true;
8545 	r = cik_startup(rdev);
8546 	if (r) {
8547 		DRM_ERROR("cik startup failed on resume\n");
8548 		rdev->accel_working = false;
8549 		return r;
8550 	}
8551 
8552 	return r;
8553 
8554 }
8555 
8556 /**
8557  * cik_suspend - suspend the asic
8558  *
8559  * @rdev: radeon_device pointer
8560  *
8561  * Bring the chip into a state suitable for suspend (CIK).
8562  * Called at suspend.
8563  * Returns 0 for success.
8564  */
8565 int cik_suspend(struct radeon_device *rdev)
8566 {
8567 	radeon_kfd_suspend(rdev);
8568 	radeon_pm_suspend(rdev);
8569 	radeon_audio_fini(rdev);
8570 	radeon_vm_manager_fini(rdev);
8571 	cik_cp_enable(rdev, false);
8572 	cik_sdma_enable(rdev, false);
8573 	if (rdev->has_uvd) {
8574 		uvd_v1_0_fini(rdev);
8575 		radeon_uvd_suspend(rdev);
8576 	}
8577 	if (rdev->has_vce)
8578 		radeon_vce_suspend(rdev);
8579 	cik_fini_pg(rdev);
8580 	cik_fini_cg(rdev);
8581 	cik_irq_suspend(rdev);
8582 	radeon_wb_disable(rdev);
8583 	cik_pcie_gart_disable(rdev);
8584 	return 0;
8585 }
8586 
8587 /* Plan is to move initialization in that function and use
8588  * helper function so that radeon_device_init pretty much
8589  * do nothing more than calling asic specific function. This
8590  * should also allow to remove a bunch of callback function
8591  * like vram_info.
8592  */
8593 /**
8594  * cik_init - asic specific driver and hw init
8595  *
8596  * @rdev: radeon_device pointer
8597  *
8598  * Setup asic specific driver variables and program the hw
8599  * to a functional state (CIK).
8600  * Called at driver startup.
8601  * Returns 0 for success, errors for failure.
8602  */
8603 int cik_init(struct radeon_device *rdev)
8604 {
8605 	struct radeon_ring *ring;
8606 	int r;
8607 
8608 	/* Read BIOS */
8609 	if (!radeon_get_bios(rdev)) {
8610 		if (ASIC_IS_AVIVO(rdev))
8611 			return -EINVAL;
8612 	}
8613 	/* Must be an ATOMBIOS */
8614 	if (!rdev->is_atom_bios) {
8615 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8616 		return -EINVAL;
8617 	}
8618 	r = radeon_atombios_init(rdev);
8619 	if (r)
8620 		return r;
8621 
8622 	/* Post card if necessary */
8623 	if (!radeon_card_posted(rdev)) {
8624 		if (!rdev->bios) {
8625 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8626 			return -EINVAL;
8627 		}
8628 		DRM_INFO("GPU not posted. posting now...\n");
8629 		atom_asic_init(rdev->mode_info.atom_context);
8630 	}
8631 	/* init golden registers */
8632 	cik_init_golden_registers(rdev);
8633 	/* Initialize scratch registers */
8634 	cik_scratch_init(rdev);
8635 	/* Initialize surface registers */
8636 	radeon_surface_init(rdev);
8637 	/* Initialize clocks */
8638 	radeon_get_clock_info(rdev->ddev);
8639 
8640 	/* Fence driver */
8641 	r = radeon_fence_driver_init(rdev);
8642 	if (r)
8643 		return r;
8644 
8645 	/* initialize memory controller */
8646 	r = cik_mc_init(rdev);
8647 	if (r)
8648 		return r;
8649 	/* Memory manager */
8650 	r = radeon_bo_init(rdev);
8651 	if (r)
8652 		return r;
8653 
8654 	if (rdev->flags & RADEON_IS_IGP) {
8655 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8656 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8657 			r = cik_init_microcode(rdev);
8658 			if (r) {
8659 				DRM_ERROR("Failed to load firmware!\n");
8660 				return r;
8661 			}
8662 		}
8663 	} else {
8664 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8665 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8666 		    !rdev->mc_fw) {
8667 			r = cik_init_microcode(rdev);
8668 			if (r) {
8669 				DRM_ERROR("Failed to load firmware!\n");
8670 				return r;
8671 			}
8672 		}
8673 	}
8674 
8675 	/* Initialize power management */
8676 	radeon_pm_init(rdev);
8677 
8678 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8679 	ring->ring_obj = NULL;
8680 	r600_ring_init(rdev, ring, 1024 * 1024);
8681 
8682 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8683 	ring->ring_obj = NULL;
8684 	r600_ring_init(rdev, ring, 1024 * 1024);
8685 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8686 	if (r)
8687 		return r;
8688 
8689 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8690 	ring->ring_obj = NULL;
8691 	r600_ring_init(rdev, ring, 1024 * 1024);
8692 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8693 	if (r)
8694 		return r;
8695 
8696 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8697 	ring->ring_obj = NULL;
8698 	r600_ring_init(rdev, ring, 256 * 1024);
8699 
8700 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8701 	ring->ring_obj = NULL;
8702 	r600_ring_init(rdev, ring, 256 * 1024);
8703 
8704 	cik_uvd_init(rdev);
8705 	cik_vce_init(rdev);
8706 
8707 	rdev->ih.ring_obj = NULL;
8708 	r600_ih_ring_init(rdev, 64 * 1024);
8709 
8710 	r = r600_pcie_gart_init(rdev);
8711 	if (r)
8712 		return r;
8713 
8714 	rdev->accel_working = true;
8715 	r = cik_startup(rdev);
8716 	if (r) {
8717 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8718 		cik_cp_fini(rdev);
8719 		cik_sdma_fini(rdev);
8720 		cik_irq_fini(rdev);
8721 		sumo_rlc_fini(rdev);
8722 		cik_mec_fini(rdev);
8723 		radeon_wb_fini(rdev);
8724 		radeon_ib_pool_fini(rdev);
8725 		radeon_vm_manager_fini(rdev);
8726 		radeon_irq_kms_fini(rdev);
8727 		cik_pcie_gart_fini(rdev);
8728 		rdev->accel_working = false;
8729 	}
8730 
8731 	/* Don't start up if the MC ucode is missing.
8732 	 * The default clocks and voltages before the MC ucode
8733 	 * is loaded are not suffient for advanced operations.
8734 	 */
8735 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8736 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8737 		return -EINVAL;
8738 	}
8739 
8740 	return 0;
8741 }
8742 
8743 /**
8744  * cik_fini - asic specific driver and hw fini
8745  *
8746  * @rdev: radeon_device pointer
8747  *
8748  * Tear down the asic specific driver variables and program the hw
8749  * to an idle state (CIK).
8750  * Called at driver unload.
8751  */
8752 void cik_fini(struct radeon_device *rdev)
8753 {
8754 	radeon_pm_fini(rdev);
8755 	cik_cp_fini(rdev);
8756 	cik_sdma_fini(rdev);
8757 	cik_fini_pg(rdev);
8758 	cik_fini_cg(rdev);
8759 	cik_irq_fini(rdev);
8760 	sumo_rlc_fini(rdev);
8761 	cik_mec_fini(rdev);
8762 	radeon_wb_fini(rdev);
8763 	radeon_vm_manager_fini(rdev);
8764 	radeon_ib_pool_fini(rdev);
8765 	radeon_irq_kms_fini(rdev);
8766 	uvd_v1_0_fini(rdev);
8767 	radeon_uvd_fini(rdev);
8768 	radeon_vce_fini(rdev);
8769 	cik_pcie_gart_fini(rdev);
8770 	r600_vram_scratch_fini(rdev);
8771 	radeon_gem_fini(rdev);
8772 	radeon_fence_driver_fini(rdev);
8773 	radeon_bo_fini(rdev);
8774 	radeon_atombios_fini(rdev);
8775 	kfree(rdev->bios);
8776 	rdev->bios = NULL;
8777 }
8778 
8779 void dce8_program_fmt(struct drm_encoder *encoder)
8780 {
8781 	struct drm_device *dev = encoder->dev;
8782 	struct radeon_device *rdev = dev->dev_private;
8783 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8784 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8785 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8786 	int bpc = 0;
8787 	u32 tmp = 0;
8788 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8789 
8790 	if (connector) {
8791 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8792 		bpc = radeon_get_monitor_bpc(connector);
8793 		dither = radeon_connector->dither;
8794 	}
8795 
8796 	/* LVDS/eDP FMT is set up by atom */
8797 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8798 		return;
8799 
8800 	/* not needed for analog */
8801 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8802 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8803 		return;
8804 
8805 	if (bpc == 0)
8806 		return;
8807 
8808 	switch (bpc) {
8809 	case 6:
8810 		if (dither == RADEON_FMT_DITHER_ENABLE)
8811 			/* XXX sort out optimal dither settings */
8812 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8813 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8814 		else
8815 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8816 		break;
8817 	case 8:
8818 		if (dither == RADEON_FMT_DITHER_ENABLE)
8819 			/* XXX sort out optimal dither settings */
8820 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8821 				FMT_RGB_RANDOM_ENABLE |
8822 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8823 		else
8824 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8825 		break;
8826 	case 10:
8827 		if (dither == RADEON_FMT_DITHER_ENABLE)
8828 			/* XXX sort out optimal dither settings */
8829 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8830 				FMT_RGB_RANDOM_ENABLE |
8831 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8832 		else
8833 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8834 		break;
8835 	default:
8836 		/* not needed */
8837 		break;
8838 	}
8839 
8840 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8841 }
8842 
8843 /* display watermark setup */
8844 /**
8845  * dce8_line_buffer_adjust - Set up the line buffer
8846  *
8847  * @rdev: radeon_device pointer
8848  * @radeon_crtc: the selected display controller
8849  * @mode: the current display mode on the selected display
8850  * controller
8851  *
8852  * Setup up the line buffer allocation for
8853  * the selected display controller (CIK).
8854  * Returns the line buffer size in pixels.
8855  */
8856 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8857 				   struct radeon_crtc *radeon_crtc,
8858 				   struct drm_display_mode *mode)
8859 {
8860 	u32 tmp, buffer_alloc, i;
8861 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8862 	/*
8863 	 * Line Buffer Setup
8864 	 * There are 6 line buffers, one for each display controllers.
8865 	 * There are 3 partitions per LB. Select the number of partitions
8866 	 * to enable based on the display width.  For display widths larger
8867 	 * than 4096, you need use to use 2 display controllers and combine
8868 	 * them using the stereo blender.
8869 	 */
8870 	if (radeon_crtc->base.enabled && mode) {
8871 		if (mode->crtc_hdisplay < 1920) {
8872 			tmp = 1;
8873 			buffer_alloc = 2;
8874 		} else if (mode->crtc_hdisplay < 2560) {
8875 			tmp = 2;
8876 			buffer_alloc = 2;
8877 		} else if (mode->crtc_hdisplay < 4096) {
8878 			tmp = 0;
8879 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8880 		} else {
8881 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8882 			tmp = 0;
8883 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8884 		}
8885 	} else {
8886 		tmp = 1;
8887 		buffer_alloc = 0;
8888 	}
8889 
8890 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8891 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8892 
8893 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8894 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8895 	for (i = 0; i < rdev->usec_timeout; i++) {
8896 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8897 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8898 			break;
8899 		udelay(1);
8900 	}
8901 
8902 	if (radeon_crtc->base.enabled && mode) {
8903 		switch (tmp) {
8904 		case 0:
8905 		default:
8906 			return 4096 * 2;
8907 		case 1:
8908 			return 1920 * 2;
8909 		case 2:
8910 			return 2560 * 2;
8911 		}
8912 	}
8913 
8914 	/* controller not enabled, so no lb used */
8915 	return 0;
8916 }
8917 
8918 /**
8919  * cik_get_number_of_dram_channels - get the number of dram channels
8920  *
8921  * @rdev: radeon_device pointer
8922  *
8923  * Look up the number of video ram channels (CIK).
8924  * Used for display watermark bandwidth calculations
8925  * Returns the number of dram channels
8926  */
8927 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8928 {
8929 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8930 
8931 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8932 	case 0:
8933 	default:
8934 		return 1;
8935 	case 1:
8936 		return 2;
8937 	case 2:
8938 		return 4;
8939 	case 3:
8940 		return 8;
8941 	case 4:
8942 		return 3;
8943 	case 5:
8944 		return 6;
8945 	case 6:
8946 		return 10;
8947 	case 7:
8948 		return 12;
8949 	case 8:
8950 		return 16;
8951 	}
8952 }
8953 
8954 struct dce8_wm_params {
8955 	u32 dram_channels; /* number of dram channels */
8956 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8957 	u32 sclk;          /* engine clock in kHz */
8958 	u32 disp_clk;      /* display clock in kHz */
8959 	u32 src_width;     /* viewport width */
8960 	u32 active_time;   /* active display time in ns */
8961 	u32 blank_time;    /* blank time in ns */
8962 	bool interlaced;    /* mode is interlaced */
8963 	fixed20_12 vsc;    /* vertical scale ratio */
8964 	u32 num_heads;     /* number of active crtcs */
8965 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8966 	u32 lb_size;       /* line buffer allocated to pipe */
8967 	u32 vtaps;         /* vertical scaler taps */
8968 };
8969 
8970 /**
8971  * dce8_dram_bandwidth - get the dram bandwidth
8972  *
8973  * @wm: watermark calculation data
8974  *
8975  * Calculate the raw dram bandwidth (CIK).
8976  * Used for display watermark bandwidth calculations
8977  * Returns the dram bandwidth in MBytes/s
8978  */
8979 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8980 {
8981 	/* Calculate raw DRAM Bandwidth */
8982 	fixed20_12 dram_efficiency; /* 0.7 */
8983 	fixed20_12 yclk, dram_channels, bandwidth;
8984 	fixed20_12 a;
8985 
8986 	a.full = dfixed_const(1000);
8987 	yclk.full = dfixed_const(wm->yclk);
8988 	yclk.full = dfixed_div(yclk, a);
8989 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8990 	a.full = dfixed_const(10);
8991 	dram_efficiency.full = dfixed_const(7);
8992 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8993 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8994 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8995 
8996 	return dfixed_trunc(bandwidth);
8997 }
8998 
8999 /**
9000  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9001  *
9002  * @wm: watermark calculation data
9003  *
9004  * Calculate the dram bandwidth used for display (CIK).
9005  * Used for display watermark bandwidth calculations
9006  * Returns the dram bandwidth for display in MBytes/s
9007  */
9008 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9009 {
9010 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9011 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9012 	fixed20_12 yclk, dram_channels, bandwidth;
9013 	fixed20_12 a;
9014 
9015 	a.full = dfixed_const(1000);
9016 	yclk.full = dfixed_const(wm->yclk);
9017 	yclk.full = dfixed_div(yclk, a);
9018 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9019 	a.full = dfixed_const(10);
9020 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9021 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9022 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9023 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9024 
9025 	return dfixed_trunc(bandwidth);
9026 }
9027 
9028 /**
9029  * dce8_data_return_bandwidth - get the data return bandwidth
9030  *
9031  * @wm: watermark calculation data
9032  *
9033  * Calculate the data return bandwidth used for display (CIK).
9034  * Used for display watermark bandwidth calculations
9035  * Returns the data return bandwidth in MBytes/s
9036  */
9037 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9038 {
9039 	/* Calculate the display Data return Bandwidth */
9040 	fixed20_12 return_efficiency; /* 0.8 */
9041 	fixed20_12 sclk, bandwidth;
9042 	fixed20_12 a;
9043 
9044 	a.full = dfixed_const(1000);
9045 	sclk.full = dfixed_const(wm->sclk);
9046 	sclk.full = dfixed_div(sclk, a);
9047 	a.full = dfixed_const(10);
9048 	return_efficiency.full = dfixed_const(8);
9049 	return_efficiency.full = dfixed_div(return_efficiency, a);
9050 	a.full = dfixed_const(32);
9051 	bandwidth.full = dfixed_mul(a, sclk);
9052 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9053 
9054 	return dfixed_trunc(bandwidth);
9055 }
9056 
9057 /**
9058  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9059  *
9060  * @wm: watermark calculation data
9061  *
9062  * Calculate the dmif bandwidth used for display (CIK).
9063  * Used for display watermark bandwidth calculations
9064  * Returns the dmif bandwidth in MBytes/s
9065  */
9066 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9067 {
9068 	/* Calculate the DMIF Request Bandwidth */
9069 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9070 	fixed20_12 disp_clk, bandwidth;
9071 	fixed20_12 a, b;
9072 
9073 	a.full = dfixed_const(1000);
9074 	disp_clk.full = dfixed_const(wm->disp_clk);
9075 	disp_clk.full = dfixed_div(disp_clk, a);
9076 	a.full = dfixed_const(32);
9077 	b.full = dfixed_mul(a, disp_clk);
9078 
9079 	a.full = dfixed_const(10);
9080 	disp_clk_request_efficiency.full = dfixed_const(8);
9081 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9082 
9083 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9084 
9085 	return dfixed_trunc(bandwidth);
9086 }
9087 
9088 /**
9089  * dce8_available_bandwidth - get the min available bandwidth
9090  *
9091  * @wm: watermark calculation data
9092  *
9093  * Calculate the min available bandwidth used for display (CIK).
9094  * Used for display watermark bandwidth calculations
9095  * Returns the min available bandwidth in MBytes/s
9096  */
9097 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9098 {
9099 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9100 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9101 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9102 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9103 
9104 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9105 }
9106 
9107 /**
9108  * dce8_average_bandwidth - get the average available bandwidth
9109  *
9110  * @wm: watermark calculation data
9111  *
9112  * Calculate the average available bandwidth used for display (CIK).
9113  * Used for display watermark bandwidth calculations
9114  * Returns the average available bandwidth in MBytes/s
9115  */
9116 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9117 {
9118 	/* Calculate the display mode Average Bandwidth
9119 	 * DisplayMode should contain the source and destination dimensions,
9120 	 * timing, etc.
9121 	 */
9122 	fixed20_12 bpp;
9123 	fixed20_12 line_time;
9124 	fixed20_12 src_width;
9125 	fixed20_12 bandwidth;
9126 	fixed20_12 a;
9127 
9128 	a.full = dfixed_const(1000);
9129 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9130 	line_time.full = dfixed_div(line_time, a);
9131 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9132 	src_width.full = dfixed_const(wm->src_width);
9133 	bandwidth.full = dfixed_mul(src_width, bpp);
9134 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9135 	bandwidth.full = dfixed_div(bandwidth, line_time);
9136 
9137 	return dfixed_trunc(bandwidth);
9138 }
9139 
9140 /**
9141  * dce8_latency_watermark - get the latency watermark
9142  *
9143  * @wm: watermark calculation data
9144  *
9145  * Calculate the latency watermark (CIK).
9146  * Used for display watermark bandwidth calculations
9147  * Returns the latency watermark in ns
9148  */
9149 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9150 {
9151 	/* First calculate the latency in ns */
9152 	u32 mc_latency = 2000; /* 2000 ns. */
9153 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9154 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9155 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9156 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9157 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9158 		(wm->num_heads * cursor_line_pair_return_time);
9159 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9160 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9161 	u32 tmp, dmif_size = 12288;
9162 	fixed20_12 a, b, c;
9163 
9164 	if (wm->num_heads == 0)
9165 		return 0;
9166 
9167 	a.full = dfixed_const(2);
9168 	b.full = dfixed_const(1);
9169 	if ((wm->vsc.full > a.full) ||
9170 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9171 	    (wm->vtaps >= 5) ||
9172 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9173 		max_src_lines_per_dst_line = 4;
9174 	else
9175 		max_src_lines_per_dst_line = 2;
9176 
9177 	a.full = dfixed_const(available_bandwidth);
9178 	b.full = dfixed_const(wm->num_heads);
9179 	a.full = dfixed_div(a, b);
9180 
9181 	b.full = dfixed_const(mc_latency + 512);
9182 	c.full = dfixed_const(wm->disp_clk);
9183 	b.full = dfixed_div(b, c);
9184 
9185 	c.full = dfixed_const(dmif_size);
9186 	b.full = dfixed_div(c, b);
9187 
9188 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9189 
9190 	b.full = dfixed_const(1000);
9191 	c.full = dfixed_const(wm->disp_clk);
9192 	b.full = dfixed_div(c, b);
9193 	c.full = dfixed_const(wm->bytes_per_pixel);
9194 	b.full = dfixed_mul(b, c);
9195 
9196 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9197 
9198 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9199 	b.full = dfixed_const(1000);
9200 	c.full = dfixed_const(lb_fill_bw);
9201 	b.full = dfixed_div(c, b);
9202 	a.full = dfixed_div(a, b);
9203 	line_fill_time = dfixed_trunc(a);
9204 
9205 	if (line_fill_time < wm->active_time)
9206 		return latency;
9207 	else
9208 		return latency + (line_fill_time - wm->active_time);
9209 
9210 }
9211 
9212 /**
9213  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9214  * average and available dram bandwidth
9215  *
9216  * @wm: watermark calculation data
9217  *
9218  * Check if the display average bandwidth fits in the display
9219  * dram bandwidth (CIK).
9220  * Used for display watermark bandwidth calculations
9221  * Returns true if the display fits, false if not.
9222  */
9223 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9224 {
9225 	if (dce8_average_bandwidth(wm) <=
9226 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9227 		return true;
9228 	else
9229 		return false;
9230 }
9231 
9232 /**
9233  * dce8_average_bandwidth_vs_available_bandwidth - check
9234  * average and available bandwidth
9235  *
9236  * @wm: watermark calculation data
9237  *
9238  * Check if the display average bandwidth fits in the display
9239  * available bandwidth (CIK).
9240  * Used for display watermark bandwidth calculations
9241  * Returns true if the display fits, false if not.
9242  */
9243 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9244 {
9245 	if (dce8_average_bandwidth(wm) <=
9246 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9247 		return true;
9248 	else
9249 		return false;
9250 }
9251 
9252 /**
9253  * dce8_check_latency_hiding - check latency hiding
9254  *
9255  * @wm: watermark calculation data
9256  *
9257  * Check latency hiding (CIK).
9258  * Used for display watermark bandwidth calculations
9259  * Returns true if the display fits, false if not.
9260  */
9261 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9262 {
9263 	u32 lb_partitions = wm->lb_size / wm->src_width;
9264 	u32 line_time = wm->active_time + wm->blank_time;
9265 	u32 latency_tolerant_lines;
9266 	u32 latency_hiding;
9267 	fixed20_12 a;
9268 
9269 	a.full = dfixed_const(1);
9270 	if (wm->vsc.full > a.full)
9271 		latency_tolerant_lines = 1;
9272 	else {
9273 		if (lb_partitions <= (wm->vtaps + 1))
9274 			latency_tolerant_lines = 1;
9275 		else
9276 			latency_tolerant_lines = 2;
9277 	}
9278 
9279 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9280 
9281 	if (dce8_latency_watermark(wm) <= latency_hiding)
9282 		return true;
9283 	else
9284 		return false;
9285 }
9286 
9287 /**
9288  * dce8_program_watermarks - program display watermarks
9289  *
9290  * @rdev: radeon_device pointer
9291  * @radeon_crtc: the selected display controller
9292  * @lb_size: line buffer size
9293  * @num_heads: number of display controllers in use
9294  *
9295  * Calculate and program the display watermarks for the
9296  * selected display controller (CIK).
9297  */
9298 static void dce8_program_watermarks(struct radeon_device *rdev,
9299 				    struct radeon_crtc *radeon_crtc,
9300 				    u32 lb_size, u32 num_heads)
9301 {
9302 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9303 	struct dce8_wm_params wm_low, wm_high;
9304 	u32 pixel_period;
9305 	u32 line_time = 0;
9306 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9307 	u32 tmp, wm_mask;
9308 
9309 	if (radeon_crtc->base.enabled && num_heads && mode) {
9310 		pixel_period = 1000000 / (u32)mode->clock;
9311 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9312 
9313 		/* watermark for high clocks */
9314 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9315 		    rdev->pm.dpm_enabled) {
9316 			wm_high.yclk =
9317 				radeon_dpm_get_mclk(rdev, false) * 10;
9318 			wm_high.sclk =
9319 				radeon_dpm_get_sclk(rdev, false) * 10;
9320 		} else {
9321 			wm_high.yclk = rdev->pm.current_mclk * 10;
9322 			wm_high.sclk = rdev->pm.current_sclk * 10;
9323 		}
9324 
9325 		wm_high.disp_clk = mode->clock;
9326 		wm_high.src_width = mode->crtc_hdisplay;
9327 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9328 		wm_high.blank_time = line_time - wm_high.active_time;
9329 		wm_high.interlaced = false;
9330 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9331 			wm_high.interlaced = true;
9332 		wm_high.vsc = radeon_crtc->vsc;
9333 		wm_high.vtaps = 1;
9334 		if (radeon_crtc->rmx_type != RMX_OFF)
9335 			wm_high.vtaps = 2;
9336 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9337 		wm_high.lb_size = lb_size;
9338 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9339 		wm_high.num_heads = num_heads;
9340 
9341 		/* set for high clocks */
9342 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9343 
9344 		/* possibly force display priority to high */
9345 		/* should really do this at mode validation time... */
9346 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9347 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9348 		    !dce8_check_latency_hiding(&wm_high) ||
9349 		    (rdev->disp_priority == 2)) {
9350 			DRM_DEBUG_KMS("force priority to high\n");
9351 		}
9352 
9353 		/* watermark for low clocks */
9354 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9355 		    rdev->pm.dpm_enabled) {
9356 			wm_low.yclk =
9357 				radeon_dpm_get_mclk(rdev, true) * 10;
9358 			wm_low.sclk =
9359 				radeon_dpm_get_sclk(rdev, true) * 10;
9360 		} else {
9361 			wm_low.yclk = rdev->pm.current_mclk * 10;
9362 			wm_low.sclk = rdev->pm.current_sclk * 10;
9363 		}
9364 
9365 		wm_low.disp_clk = mode->clock;
9366 		wm_low.src_width = mode->crtc_hdisplay;
9367 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9368 		wm_low.blank_time = line_time - wm_low.active_time;
9369 		wm_low.interlaced = false;
9370 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9371 			wm_low.interlaced = true;
9372 		wm_low.vsc = radeon_crtc->vsc;
9373 		wm_low.vtaps = 1;
9374 		if (radeon_crtc->rmx_type != RMX_OFF)
9375 			wm_low.vtaps = 2;
9376 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9377 		wm_low.lb_size = lb_size;
9378 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9379 		wm_low.num_heads = num_heads;
9380 
9381 		/* set for low clocks */
9382 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9383 
9384 		/* possibly force display priority to high */
9385 		/* should really do this at mode validation time... */
9386 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9387 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9388 		    !dce8_check_latency_hiding(&wm_low) ||
9389 		    (rdev->disp_priority == 2)) {
9390 			DRM_DEBUG_KMS("force priority to high\n");
9391 		}
9392 
9393 		/* Save number of lines the linebuffer leads before the scanout */
9394 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9395 	}
9396 
9397 	/* select wm A */
9398 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9399 	tmp = wm_mask;
9400 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9401 	tmp |= LATENCY_WATERMARK_MASK(1);
9402 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9403 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9404 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9405 		LATENCY_HIGH_WATERMARK(line_time)));
9406 	/* select wm B */
9407 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9408 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9409 	tmp |= LATENCY_WATERMARK_MASK(2);
9410 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9411 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9412 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9413 		LATENCY_HIGH_WATERMARK(line_time)));
9414 	/* restore original selection */
9415 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9416 
9417 	/* save values for DPM */
9418 	radeon_crtc->line_time = line_time;
9419 	radeon_crtc->wm_high = latency_watermark_a;
9420 	radeon_crtc->wm_low = latency_watermark_b;
9421 }
9422 
9423 /**
9424  * dce8_bandwidth_update - program display watermarks
9425  *
9426  * @rdev: radeon_device pointer
9427  *
9428  * Calculate and program the display watermarks and line
9429  * buffer allocation (CIK).
9430  */
9431 void dce8_bandwidth_update(struct radeon_device *rdev)
9432 {
9433 	struct drm_display_mode *mode = NULL;
9434 	u32 num_heads = 0, lb_size;
9435 	int i;
9436 
9437 	if (!rdev->mode_info.mode_config_initialized)
9438 		return;
9439 
9440 	radeon_update_display_priority(rdev);
9441 
9442 	for (i = 0; i < rdev->num_crtc; i++) {
9443 		if (rdev->mode_info.crtcs[i]->base.enabled)
9444 			num_heads++;
9445 	}
9446 	for (i = 0; i < rdev->num_crtc; i++) {
9447 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9448 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9449 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9450 	}
9451 }
9452 
9453 /**
9454  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9455  *
9456  * @rdev: radeon_device pointer
9457  *
9458  * Fetches a GPU clock counter snapshot (SI).
9459  * Returns the 64 bit clock counter snapshot.
9460  */
9461 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9462 {
9463 	uint64_t clock;
9464 
9465 	mutex_lock(&rdev->gpu_clock_mutex);
9466 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9467 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9468 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9469 	mutex_unlock(&rdev->gpu_clock_mutex);
9470 	return clock;
9471 }
9472 
9473 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9474 			     u32 cntl_reg, u32 status_reg)
9475 {
9476 	int r, i;
9477 	struct atom_clock_dividers dividers;
9478 	uint32_t tmp;
9479 
9480 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9481 					   clock, false, &dividers);
9482 	if (r)
9483 		return r;
9484 
9485 	tmp = RREG32_SMC(cntl_reg);
9486 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9487 	tmp |= dividers.post_divider;
9488 	WREG32_SMC(cntl_reg, tmp);
9489 
9490 	for (i = 0; i < 100; i++) {
9491 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9492 			break;
9493 		mdelay(10);
9494 	}
9495 	if (i == 100)
9496 		return -ETIMEDOUT;
9497 
9498 	return 0;
9499 }
9500 
9501 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9502 {
9503 	int r = 0;
9504 
9505 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9506 	if (r)
9507 		return r;
9508 
9509 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9510 	return r;
9511 }
9512 
9513 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9514 {
9515 	int r, i;
9516 	struct atom_clock_dividers dividers;
9517 	u32 tmp;
9518 
9519 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9520 					   ecclk, false, &dividers);
9521 	if (r)
9522 		return r;
9523 
9524 	for (i = 0; i < 100; i++) {
9525 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9526 			break;
9527 		mdelay(10);
9528 	}
9529 	if (i == 100)
9530 		return -ETIMEDOUT;
9531 
9532 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9533 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9534 	tmp |= dividers.post_divider;
9535 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9536 
9537 	for (i = 0; i < 100; i++) {
9538 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9539 			break;
9540 		mdelay(10);
9541 	}
9542 	if (i == 100)
9543 		return -ETIMEDOUT;
9544 
9545 	return 0;
9546 }
9547 
9548 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9549 {
9550 	struct pci_dev *root = rdev->pdev->bus->self;
9551 	int bridge_pos, gpu_pos;
9552 	u32 speed_cntl, mask, current_data_rate;
9553 	int ret, i;
9554 	u16 tmp16;
9555 
9556 	if (pci_is_root_bus(rdev->pdev->bus))
9557 		return;
9558 
9559 	if (radeon_pcie_gen2 == 0)
9560 		return;
9561 
9562 	if (rdev->flags & RADEON_IS_IGP)
9563 		return;
9564 
9565 	if (!(rdev->flags & RADEON_IS_PCIE))
9566 		return;
9567 
9568 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9569 	if (ret != 0)
9570 		return;
9571 
9572 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9573 		return;
9574 
9575 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9576 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9577 		LC_CURRENT_DATA_RATE_SHIFT;
9578 	if (mask & DRM_PCIE_SPEED_80) {
9579 		if (current_data_rate == 2) {
9580 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9581 			return;
9582 		}
9583 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9584 	} else if (mask & DRM_PCIE_SPEED_50) {
9585 		if (current_data_rate == 1) {
9586 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9587 			return;
9588 		}
9589 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9590 	}
9591 
9592 	bridge_pos = pci_pcie_cap(root);
9593 	if (!bridge_pos)
9594 		return;
9595 
9596 	gpu_pos = pci_pcie_cap(rdev->pdev);
9597 	if (!gpu_pos)
9598 		return;
9599 
9600 	if (mask & DRM_PCIE_SPEED_80) {
9601 		/* re-try equalization if gen3 is not already enabled */
9602 		if (current_data_rate != 2) {
9603 			u16 bridge_cfg, gpu_cfg;
9604 			u16 bridge_cfg2, gpu_cfg2;
9605 			u32 max_lw, current_lw, tmp;
9606 
9607 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9608 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9609 
9610 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9611 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9612 
9613 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9614 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9615 
9616 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9617 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9618 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9619 
9620 			if (current_lw < max_lw) {
9621 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9622 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9623 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9624 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9625 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9626 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9627 				}
9628 			}
9629 
9630 			for (i = 0; i < 10; i++) {
9631 				/* check status */
9632 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9633 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9634 					break;
9635 
9636 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9637 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9638 
9639 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9640 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9641 
9642 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9643 				tmp |= LC_SET_QUIESCE;
9644 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9645 
9646 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9647 				tmp |= LC_REDO_EQ;
9648 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9649 
9650 				mdelay(100);
9651 
9652 				/* linkctl */
9653 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9654 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9655 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9656 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9657 
9658 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9659 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9660 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9661 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9662 
9663 				/* linkctl2 */
9664 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9665 				tmp16 &= ~((1 << 4) | (7 << 9));
9666 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9667 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9668 
9669 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9670 				tmp16 &= ~((1 << 4) | (7 << 9));
9671 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9672 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9673 
9674 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9675 				tmp &= ~LC_SET_QUIESCE;
9676 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9677 			}
9678 		}
9679 	}
9680 
9681 	/* set the link speed */
9682 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9683 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9684 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9685 
9686 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9687 	tmp16 &= ~0xf;
9688 	if (mask & DRM_PCIE_SPEED_80)
9689 		tmp16 |= 3; /* gen3 */
9690 	else if (mask & DRM_PCIE_SPEED_50)
9691 		tmp16 |= 2; /* gen2 */
9692 	else
9693 		tmp16 |= 1; /* gen1 */
9694 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9695 
9696 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9697 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9698 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9699 
9700 	for (i = 0; i < rdev->usec_timeout; i++) {
9701 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9702 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9703 			break;
9704 		udelay(1);
9705 	}
9706 }
9707 
9708 static void cik_program_aspm(struct radeon_device *rdev)
9709 {
9710 	u32 data, orig;
9711 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9712 	bool disable_clkreq = false;
9713 
9714 	if (radeon_aspm == 0)
9715 		return;
9716 
9717 	/* XXX double check IGPs */
9718 	if (rdev->flags & RADEON_IS_IGP)
9719 		return;
9720 
9721 	if (!(rdev->flags & RADEON_IS_PCIE))
9722 		return;
9723 
9724 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9725 	data &= ~LC_XMIT_N_FTS_MASK;
9726 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9727 	if (orig != data)
9728 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9729 
9730 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9731 	data |= LC_GO_TO_RECOVERY;
9732 	if (orig != data)
9733 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9734 
9735 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9736 	data |= P_IGNORE_EDB_ERR;
9737 	if (orig != data)
9738 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9739 
9740 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9741 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9742 	data |= LC_PMI_TO_L1_DIS;
9743 	if (!disable_l0s)
9744 		data |= LC_L0S_INACTIVITY(7);
9745 
9746 	if (!disable_l1) {
9747 		data |= LC_L1_INACTIVITY(7);
9748 		data &= ~LC_PMI_TO_L1_DIS;
9749 		if (orig != data)
9750 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9751 
9752 		if (!disable_plloff_in_l1) {
9753 			bool clk_req_support;
9754 
9755 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9756 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9757 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9758 			if (orig != data)
9759 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9760 
9761 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9762 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9763 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9764 			if (orig != data)
9765 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9766 
9767 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9768 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9769 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9770 			if (orig != data)
9771 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9772 
9773 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9774 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9775 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9776 			if (orig != data)
9777 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9778 
9779 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9780 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9781 			data |= LC_DYN_LANES_PWR_STATE(3);
9782 			if (orig != data)
9783 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9784 
9785 			if (!disable_clkreq &&
9786 			    !pci_is_root_bus(rdev->pdev->bus)) {
9787 				struct pci_dev *root = rdev->pdev->bus->self;
9788 				u32 lnkcap;
9789 
9790 				clk_req_support = false;
9791 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9792 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9793 					clk_req_support = true;
9794 			} else {
9795 				clk_req_support = false;
9796 			}
9797 
9798 			if (clk_req_support) {
9799 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9800 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9801 				if (orig != data)
9802 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9803 
9804 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9805 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9806 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9807 				if (orig != data)
9808 					WREG32_SMC(THM_CLK_CNTL, data);
9809 
9810 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9811 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9812 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9813 				if (orig != data)
9814 					WREG32_SMC(MISC_CLK_CTRL, data);
9815 
9816 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9817 				data &= ~BCLK_AS_XCLK;
9818 				if (orig != data)
9819 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9820 
9821 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9822 				data &= ~FORCE_BIF_REFCLK_EN;
9823 				if (orig != data)
9824 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9825 
9826 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9827 				data &= ~MPLL_CLKOUT_SEL_MASK;
9828 				data |= MPLL_CLKOUT_SEL(4);
9829 				if (orig != data)
9830 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9831 			}
9832 		}
9833 	} else {
9834 		if (orig != data)
9835 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9836 	}
9837 
9838 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9839 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9840 	if (orig != data)
9841 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9842 
9843 	if (!disable_l0s) {
9844 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9845 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9846 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9847 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9848 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9849 				data &= ~LC_L0S_INACTIVITY_MASK;
9850 				if (orig != data)
9851 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9852 			}
9853 		}
9854 	}
9855 }
9856