xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision f7d84fa7)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
40 
41 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
50 
51 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
52 MODULE_FIRMWARE("radeon/bonaire_me.bin");
53 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
55 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
57 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
58 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
72 MODULE_FIRMWARE("radeon/hawaii_me.bin");
73 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
75 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
77 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
78 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
80 
81 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
87 
88 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
89 MODULE_FIRMWARE("radeon/kaveri_me.bin");
90 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
92 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
93 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
94 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
97 MODULE_FIRMWARE("radeon/KABINI_me.bin");
98 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
99 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
100 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
101 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
104 MODULE_FIRMWARE("radeon/kabini_me.bin");
105 MODULE_FIRMWARE("radeon/kabini_ce.bin");
106 MODULE_FIRMWARE("radeon/kabini_mec.bin");
107 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
108 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
116 
117 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
118 MODULE_FIRMWARE("radeon/mullins_me.bin");
119 MODULE_FIRMWARE("radeon/mullins_ce.bin");
120 MODULE_FIRMWARE("radeon/mullins_mec.bin");
121 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
122 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
123 
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
129 extern void sumo_rlc_fini(struct radeon_device *rdev);
130 extern int sumo_rlc_init(struct radeon_device *rdev);
131 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
132 extern void si_rlc_reset(struct radeon_device *rdev);
133 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
134 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
135 extern int cik_sdma_resume(struct radeon_device *rdev);
136 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
137 extern void cik_sdma_fini(struct radeon_device *rdev);
138 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
139 static void cik_rlc_stop(struct radeon_device *rdev);
140 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
141 static void cik_program_aspm(struct radeon_device *rdev);
142 static void cik_init_pg(struct radeon_device *rdev);
143 static void cik_init_cg(struct radeon_device *rdev);
144 static void cik_fini_pg(struct radeon_device *rdev);
145 static void cik_fini_cg(struct radeon_device *rdev);
146 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
147 					  bool enable);
148 
149 /**
150  * cik_get_allowed_info_register - fetch the register for the info ioctl
151  *
152  * @rdev: radeon_device pointer
153  * @reg: register offset in bytes
154  * @val: register value
155  *
156  * Returns 0 for success or -EINVAL for an invalid register
157  *
158  */
159 int cik_get_allowed_info_register(struct radeon_device *rdev,
160 				  u32 reg, u32 *val)
161 {
162 	switch (reg) {
163 	case GRBM_STATUS:
164 	case GRBM_STATUS2:
165 	case GRBM_STATUS_SE0:
166 	case GRBM_STATUS_SE1:
167 	case GRBM_STATUS_SE2:
168 	case GRBM_STATUS_SE3:
169 	case SRBM_STATUS:
170 	case SRBM_STATUS2:
171 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
172 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
173 	case UVD_STATUS:
174 	/* TODO VCE */
175 		*val = RREG32(reg);
176 		return 0;
177 	default:
178 		return -EINVAL;
179 	}
180 }
181 
182 /*
183  * Indirect registers accessor
184  */
185 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
186 {
187 	unsigned long flags;
188 	u32 r;
189 
190 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
191 	WREG32(CIK_DIDT_IND_INDEX, (reg));
192 	r = RREG32(CIK_DIDT_IND_DATA);
193 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
194 	return r;
195 }
196 
197 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199 	unsigned long flags;
200 
201 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
202 	WREG32(CIK_DIDT_IND_INDEX, (reg));
203 	WREG32(CIK_DIDT_IND_DATA, (v));
204 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
205 }
206 
207 /* get temperature in millidegrees */
208 int ci_get_temp(struct radeon_device *rdev)
209 {
210 	u32 temp;
211 	int actual_temp = 0;
212 
213 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
214 		CTF_TEMP_SHIFT;
215 
216 	if (temp & 0x200)
217 		actual_temp = 255;
218 	else
219 		actual_temp = temp & 0x1ff;
220 
221 	actual_temp = actual_temp * 1000;
222 
223 	return actual_temp;
224 }
225 
226 /* get temperature in millidegrees */
227 int kv_get_temp(struct radeon_device *rdev)
228 {
229 	u32 temp;
230 	int actual_temp = 0;
231 
232 	temp = RREG32_SMC(0xC0300E0C);
233 
234 	if (temp)
235 		actual_temp = (temp / 8) - 49;
236 	else
237 		actual_temp = 0;
238 
239 	actual_temp = actual_temp * 1000;
240 
241 	return actual_temp;
242 }
243 
244 /*
245  * Indirect registers accessor
246  */
247 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
248 {
249 	unsigned long flags;
250 	u32 r;
251 
252 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
253 	WREG32(PCIE_INDEX, reg);
254 	(void)RREG32(PCIE_INDEX);
255 	r = RREG32(PCIE_DATA);
256 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
257 	return r;
258 }
259 
260 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
261 {
262 	unsigned long flags;
263 
264 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
265 	WREG32(PCIE_INDEX, reg);
266 	(void)RREG32(PCIE_INDEX);
267 	WREG32(PCIE_DATA, v);
268 	(void)RREG32(PCIE_DATA);
269 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
270 }
271 
272 static const u32 spectre_rlc_save_restore_register_list[] =
273 {
274 	(0x0e00 << 16) | (0xc12c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc140 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc150 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc15c >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc168 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc170 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc178 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc204 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc2b4 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc2b8 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc2bc >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc2c0 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0x8228 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0x829c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0x869c >> 2),
303 	0x00000000,
304 	(0x0600 << 16) | (0x98f4 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0x98f8 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0x9900 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc260 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x90e8 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x3c000 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x3c00c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x8c1c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x9700 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xcd20 >> 2),
323 	0x00000000,
324 	(0x4e00 << 16) | (0xcd20 >> 2),
325 	0x00000000,
326 	(0x5e00 << 16) | (0xcd20 >> 2),
327 	0x00000000,
328 	(0x6e00 << 16) | (0xcd20 >> 2),
329 	0x00000000,
330 	(0x7e00 << 16) | (0xcd20 >> 2),
331 	0x00000000,
332 	(0x8e00 << 16) | (0xcd20 >> 2),
333 	0x00000000,
334 	(0x9e00 << 16) | (0xcd20 >> 2),
335 	0x00000000,
336 	(0xae00 << 16) | (0xcd20 >> 2),
337 	0x00000000,
338 	(0xbe00 << 16) | (0xcd20 >> 2),
339 	0x00000000,
340 	(0x0e00 << 16) | (0x89bc >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0x8900 >> 2),
343 	0x00000000,
344 	0x3,
345 	(0x0e00 << 16) | (0xc130 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc134 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc1fc >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc208 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc264 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc268 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc26c >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc270 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc274 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc278 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc27c >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc280 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc284 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0xc288 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0xc28c >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc290 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0xc294 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0xc298 >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0xc29c >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0xc2a0 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0xc2a4 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0xc2a8 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0xc2ac  >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0xc2b0 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0x301d0 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x30238 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x30250 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x30254 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30258 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x3025c >> 2),
404 	0x00000000,
405 	(0x4e00 << 16) | (0xc900 >> 2),
406 	0x00000000,
407 	(0x5e00 << 16) | (0xc900 >> 2),
408 	0x00000000,
409 	(0x6e00 << 16) | (0xc900 >> 2),
410 	0x00000000,
411 	(0x7e00 << 16) | (0xc900 >> 2),
412 	0x00000000,
413 	(0x8e00 << 16) | (0xc900 >> 2),
414 	0x00000000,
415 	(0x9e00 << 16) | (0xc900 >> 2),
416 	0x00000000,
417 	(0xae00 << 16) | (0xc900 >> 2),
418 	0x00000000,
419 	(0xbe00 << 16) | (0xc900 >> 2),
420 	0x00000000,
421 	(0x4e00 << 16) | (0xc904 >> 2),
422 	0x00000000,
423 	(0x5e00 << 16) | (0xc904 >> 2),
424 	0x00000000,
425 	(0x6e00 << 16) | (0xc904 >> 2),
426 	0x00000000,
427 	(0x7e00 << 16) | (0xc904 >> 2),
428 	0x00000000,
429 	(0x8e00 << 16) | (0xc904 >> 2),
430 	0x00000000,
431 	(0x9e00 << 16) | (0xc904 >> 2),
432 	0x00000000,
433 	(0xae00 << 16) | (0xc904 >> 2),
434 	0x00000000,
435 	(0xbe00 << 16) | (0xc904 >> 2),
436 	0x00000000,
437 	(0x4e00 << 16) | (0xc908 >> 2),
438 	0x00000000,
439 	(0x5e00 << 16) | (0xc908 >> 2),
440 	0x00000000,
441 	(0x6e00 << 16) | (0xc908 >> 2),
442 	0x00000000,
443 	(0x7e00 << 16) | (0xc908 >> 2),
444 	0x00000000,
445 	(0x8e00 << 16) | (0xc908 >> 2),
446 	0x00000000,
447 	(0x9e00 << 16) | (0xc908 >> 2),
448 	0x00000000,
449 	(0xae00 << 16) | (0xc908 >> 2),
450 	0x00000000,
451 	(0xbe00 << 16) | (0xc908 >> 2),
452 	0x00000000,
453 	(0x4e00 << 16) | (0xc90c >> 2),
454 	0x00000000,
455 	(0x5e00 << 16) | (0xc90c >> 2),
456 	0x00000000,
457 	(0x6e00 << 16) | (0xc90c >> 2),
458 	0x00000000,
459 	(0x7e00 << 16) | (0xc90c >> 2),
460 	0x00000000,
461 	(0x8e00 << 16) | (0xc90c >> 2),
462 	0x00000000,
463 	(0x9e00 << 16) | (0xc90c >> 2),
464 	0x00000000,
465 	(0xae00 << 16) | (0xc90c >> 2),
466 	0x00000000,
467 	(0xbe00 << 16) | (0xc90c >> 2),
468 	0x00000000,
469 	(0x4e00 << 16) | (0xc910 >> 2),
470 	0x00000000,
471 	(0x5e00 << 16) | (0xc910 >> 2),
472 	0x00000000,
473 	(0x6e00 << 16) | (0xc910 >> 2),
474 	0x00000000,
475 	(0x7e00 << 16) | (0xc910 >> 2),
476 	0x00000000,
477 	(0x8e00 << 16) | (0xc910 >> 2),
478 	0x00000000,
479 	(0x9e00 << 16) | (0xc910 >> 2),
480 	0x00000000,
481 	(0xae00 << 16) | (0xc910 >> 2),
482 	0x00000000,
483 	(0xbe00 << 16) | (0xc910 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0xc99c >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x9834 >> 2),
488 	0x00000000,
489 	(0x0000 << 16) | (0x30f00 >> 2),
490 	0x00000000,
491 	(0x0001 << 16) | (0x30f00 >> 2),
492 	0x00000000,
493 	(0x0000 << 16) | (0x30f04 >> 2),
494 	0x00000000,
495 	(0x0001 << 16) | (0x30f04 >> 2),
496 	0x00000000,
497 	(0x0000 << 16) | (0x30f08 >> 2),
498 	0x00000000,
499 	(0x0001 << 16) | (0x30f08 >> 2),
500 	0x00000000,
501 	(0x0000 << 16) | (0x30f0c >> 2),
502 	0x00000000,
503 	(0x0001 << 16) | (0x30f0c >> 2),
504 	0x00000000,
505 	(0x0600 << 16) | (0x9b7c >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0x8a14 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x8a18 >> 2),
510 	0x00000000,
511 	(0x0600 << 16) | (0x30a00 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x8bf0 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x8bcc >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x8b24 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x30a04 >> 2),
520 	0x00000000,
521 	(0x0600 << 16) | (0x30a10 >> 2),
522 	0x00000000,
523 	(0x0600 << 16) | (0x30a14 >> 2),
524 	0x00000000,
525 	(0x0600 << 16) | (0x30a18 >> 2),
526 	0x00000000,
527 	(0x0600 << 16) | (0x30a2c >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0xc700 >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0xc704 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0xc708 >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0xc768 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc770 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc774 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc778 >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc77c >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc780 >> 2),
546 	0x00000000,
547 	(0x0400 << 16) | (0xc784 >> 2),
548 	0x00000000,
549 	(0x0400 << 16) | (0xc788 >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0xc78c >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0xc798 >> 2),
554 	0x00000000,
555 	(0x0400 << 16) | (0xc79c >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0xc7a0 >> 2),
558 	0x00000000,
559 	(0x0400 << 16) | (0xc7a4 >> 2),
560 	0x00000000,
561 	(0x0400 << 16) | (0xc7a8 >> 2),
562 	0x00000000,
563 	(0x0400 << 16) | (0xc7ac >> 2),
564 	0x00000000,
565 	(0x0400 << 16) | (0xc7b0 >> 2),
566 	0x00000000,
567 	(0x0400 << 16) | (0xc7b4 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x9100 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x3c010 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x92a8 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x92ac >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x92b4 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x92b8 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x92bc >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x92c0 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x92c4 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x92c8 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x92cc >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x92d0 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x8c00 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x8c04 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x8c20 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x8c38 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0x8c3c >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xae00 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0x9604 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac08 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac0c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac10 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac14 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xac58 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xac68 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xac6c >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xac70 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xac74 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xac78 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0xac7c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0xac80 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xac84 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xac88 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xac8c >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x970c >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x9714 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x9718 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x971c >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x31068 >> 2),
646 	0x00000000,
647 	(0x4e00 << 16) | (0x31068 >> 2),
648 	0x00000000,
649 	(0x5e00 << 16) | (0x31068 >> 2),
650 	0x00000000,
651 	(0x6e00 << 16) | (0x31068 >> 2),
652 	0x00000000,
653 	(0x7e00 << 16) | (0x31068 >> 2),
654 	0x00000000,
655 	(0x8e00 << 16) | (0x31068 >> 2),
656 	0x00000000,
657 	(0x9e00 << 16) | (0x31068 >> 2),
658 	0x00000000,
659 	(0xae00 << 16) | (0x31068 >> 2),
660 	0x00000000,
661 	(0xbe00 << 16) | (0x31068 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xcd10 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xcd14 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x88b0 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x88b4 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x88b8 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x88bc >> 2),
674 	0x00000000,
675 	(0x0400 << 16) | (0x89c0 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x88c4 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x88c8 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x88d0 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x88d4 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x88d8 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x8980 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x30938 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x3093c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x30940 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x89a0 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x30900 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30904 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x89b4 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x3c210 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x3c214 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3c218 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x8904 >> 2),
710 	0x00000000,
711 	0x5,
712 	(0x0e00 << 16) | (0x8c28 >> 2),
713 	(0x0e00 << 16) | (0x8c2c >> 2),
714 	(0x0e00 << 16) | (0x8c30 >> 2),
715 	(0x0e00 << 16) | (0x8c34 >> 2),
716 	(0x0e00 << 16) | (0x9600 >> 2),
717 };
718 
719 static const u32 kalindi_rlc_save_restore_register_list[] =
720 {
721 	(0x0e00 << 16) | (0xc12c >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc140 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc150 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc15c >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc168 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc170 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc204 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc2b4 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc2b8 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc2bc >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc2c0 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x8228 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0x829c >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x869c >> 2),
748 	0x00000000,
749 	(0x0600 << 16) | (0x98f4 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x98f8 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x9900 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0xc260 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x90e8 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x3c000 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3c00c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8c1c >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x9700 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0xcd20 >> 2),
768 	0x00000000,
769 	(0x4e00 << 16) | (0xcd20 >> 2),
770 	0x00000000,
771 	(0x5e00 << 16) | (0xcd20 >> 2),
772 	0x00000000,
773 	(0x6e00 << 16) | (0xcd20 >> 2),
774 	0x00000000,
775 	(0x7e00 << 16) | (0xcd20 >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0x89bc >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x8900 >> 2),
780 	0x00000000,
781 	0x3,
782 	(0x0e00 << 16) | (0xc130 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc134 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc1fc >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc208 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc264 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc268 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc26c >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc270 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc274 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc28c >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0xc290 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc294 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0xc298 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0xc2a0 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xc2a4 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xc2a8 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xc2ac >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0x301d0 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x30238 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x30250 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0x30254 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0x30258 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x3025c >> 2),
827 	0x00000000,
828 	(0x4e00 << 16) | (0xc900 >> 2),
829 	0x00000000,
830 	(0x5e00 << 16) | (0xc900 >> 2),
831 	0x00000000,
832 	(0x6e00 << 16) | (0xc900 >> 2),
833 	0x00000000,
834 	(0x7e00 << 16) | (0xc900 >> 2),
835 	0x00000000,
836 	(0x4e00 << 16) | (0xc904 >> 2),
837 	0x00000000,
838 	(0x5e00 << 16) | (0xc904 >> 2),
839 	0x00000000,
840 	(0x6e00 << 16) | (0xc904 >> 2),
841 	0x00000000,
842 	(0x7e00 << 16) | (0xc904 >> 2),
843 	0x00000000,
844 	(0x4e00 << 16) | (0xc908 >> 2),
845 	0x00000000,
846 	(0x5e00 << 16) | (0xc908 >> 2),
847 	0x00000000,
848 	(0x6e00 << 16) | (0xc908 >> 2),
849 	0x00000000,
850 	(0x7e00 << 16) | (0xc908 >> 2),
851 	0x00000000,
852 	(0x4e00 << 16) | (0xc90c >> 2),
853 	0x00000000,
854 	(0x5e00 << 16) | (0xc90c >> 2),
855 	0x00000000,
856 	(0x6e00 << 16) | (0xc90c >> 2),
857 	0x00000000,
858 	(0x7e00 << 16) | (0xc90c >> 2),
859 	0x00000000,
860 	(0x4e00 << 16) | (0xc910 >> 2),
861 	0x00000000,
862 	(0x5e00 << 16) | (0xc910 >> 2),
863 	0x00000000,
864 	(0x6e00 << 16) | (0xc910 >> 2),
865 	0x00000000,
866 	(0x7e00 << 16) | (0xc910 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0xc99c >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x9834 >> 2),
871 	0x00000000,
872 	(0x0000 << 16) | (0x30f00 >> 2),
873 	0x00000000,
874 	(0x0000 << 16) | (0x30f04 >> 2),
875 	0x00000000,
876 	(0x0000 << 16) | (0x30f08 >> 2),
877 	0x00000000,
878 	(0x0000 << 16) | (0x30f0c >> 2),
879 	0x00000000,
880 	(0x0600 << 16) | (0x9b7c >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x8a14 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x8a18 >> 2),
885 	0x00000000,
886 	(0x0600 << 16) | (0x30a00 >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x8bf0 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x8bcc >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x8b24 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x30a04 >> 2),
895 	0x00000000,
896 	(0x0600 << 16) | (0x30a10 >> 2),
897 	0x00000000,
898 	(0x0600 << 16) | (0x30a14 >> 2),
899 	0x00000000,
900 	(0x0600 << 16) | (0x30a18 >> 2),
901 	0x00000000,
902 	(0x0600 << 16) | (0x30a2c >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0xc700 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0xc704 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xc708 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xc768 >> 2),
911 	0x00000000,
912 	(0x0400 << 16) | (0xc770 >> 2),
913 	0x00000000,
914 	(0x0400 << 16) | (0xc774 >> 2),
915 	0x00000000,
916 	(0x0400 << 16) | (0xc798 >> 2),
917 	0x00000000,
918 	(0x0400 << 16) | (0xc79c >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0x9100 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x3c010 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0x8c00 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x8c04 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x8c20 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x8c38 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x8c3c >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xae00 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0x9604 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac08 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac0c >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac10 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac14 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0xac58 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0xac68 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0xac6c >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0xac70 >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0xac74 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xac78 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xac7c >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0xac80 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xac84 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xac88 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0xac8c >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x970c >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x9714 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x9718 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x971c >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x31068 >> 2),
977 	0x00000000,
978 	(0x4e00 << 16) | (0x31068 >> 2),
979 	0x00000000,
980 	(0x5e00 << 16) | (0x31068 >> 2),
981 	0x00000000,
982 	(0x6e00 << 16) | (0x31068 >> 2),
983 	0x00000000,
984 	(0x7e00 << 16) | (0x31068 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0xcd10 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0xcd14 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x88b0 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x88b4 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x88b8 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x88bc >> 2),
997 	0x00000000,
998 	(0x0400 << 16) | (0x89c0 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x88c4 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x88c8 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x88d0 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x88d4 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x88d8 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x8980 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x30938 >> 2),
1013 	0x00000000,
1014 	(0x0e00 << 16) | (0x3093c >> 2),
1015 	0x00000000,
1016 	(0x0e00 << 16) | (0x30940 >> 2),
1017 	0x00000000,
1018 	(0x0e00 << 16) | (0x89a0 >> 2),
1019 	0x00000000,
1020 	(0x0e00 << 16) | (0x30900 >> 2),
1021 	0x00000000,
1022 	(0x0e00 << 16) | (0x30904 >> 2),
1023 	0x00000000,
1024 	(0x0e00 << 16) | (0x89b4 >> 2),
1025 	0x00000000,
1026 	(0x0e00 << 16) | (0x3e1fc >> 2),
1027 	0x00000000,
1028 	(0x0e00 << 16) | (0x3c210 >> 2),
1029 	0x00000000,
1030 	(0x0e00 << 16) | (0x3c214 >> 2),
1031 	0x00000000,
1032 	(0x0e00 << 16) | (0x3c218 >> 2),
1033 	0x00000000,
1034 	(0x0e00 << 16) | (0x8904 >> 2),
1035 	0x00000000,
1036 	0x5,
1037 	(0x0e00 << 16) | (0x8c28 >> 2),
1038 	(0x0e00 << 16) | (0x8c2c >> 2),
1039 	(0x0e00 << 16) | (0x8c30 >> 2),
1040 	(0x0e00 << 16) | (0x8c34 >> 2),
1041 	(0x0e00 << 16) | (0x9600 >> 2),
1042 };
1043 
1044 static const u32 bonaire_golden_spm_registers[] =
1045 {
1046 	0x30800, 0xe0ffffff, 0xe0000000
1047 };
1048 
1049 static const u32 bonaire_golden_common_registers[] =
1050 {
1051 	0xc770, 0xffffffff, 0x00000800,
1052 	0xc774, 0xffffffff, 0x00000800,
1053 	0xc798, 0xffffffff, 0x00007fbf,
1054 	0xc79c, 0xffffffff, 0x00007faf
1055 };
1056 
1057 static const u32 bonaire_golden_registers[] =
1058 {
1059 	0x3354, 0x00000333, 0x00000333,
1060 	0x3350, 0x000c0fc0, 0x00040200,
1061 	0x9a10, 0x00010000, 0x00058208,
1062 	0x3c000, 0xffff1fff, 0x00140000,
1063 	0x3c200, 0xfdfc0fff, 0x00000100,
1064 	0x3c234, 0x40000000, 0x40000200,
1065 	0x9830, 0xffffffff, 0x00000000,
1066 	0x9834, 0xf00fffff, 0x00000400,
1067 	0x9838, 0x0002021c, 0x00020200,
1068 	0xc78, 0x00000080, 0x00000000,
1069 	0x5bb0, 0x000000f0, 0x00000070,
1070 	0x5bc0, 0xf0311fff, 0x80300000,
1071 	0x98f8, 0x73773777, 0x12010001,
1072 	0x350c, 0x00810000, 0x408af000,
1073 	0x7030, 0x31000111, 0x00000011,
1074 	0x2f48, 0x73773777, 0x12010001,
1075 	0x220c, 0x00007fb6, 0x0021a1b1,
1076 	0x2210, 0x00007fb6, 0x002021b1,
1077 	0x2180, 0x00007fb6, 0x00002191,
1078 	0x2218, 0x00007fb6, 0x002121b1,
1079 	0x221c, 0x00007fb6, 0x002021b1,
1080 	0x21dc, 0x00007fb6, 0x00002191,
1081 	0x21e0, 0x00007fb6, 0x00002191,
1082 	0x3628, 0x0000003f, 0x0000000a,
1083 	0x362c, 0x0000003f, 0x0000000a,
1084 	0x2ae4, 0x00073ffe, 0x000022a2,
1085 	0x240c, 0x000007ff, 0x00000000,
1086 	0x8a14, 0xf000003f, 0x00000007,
1087 	0x8bf0, 0x00002001, 0x00000001,
1088 	0x8b24, 0xffffffff, 0x00ffffff,
1089 	0x30a04, 0x0000ff0f, 0x00000000,
1090 	0x28a4c, 0x07ffffff, 0x06000000,
1091 	0x4d8, 0x00000fff, 0x00000100,
1092 	0x3e78, 0x00000001, 0x00000002,
1093 	0x9100, 0x03000000, 0x0362c688,
1094 	0x8c00, 0x000000ff, 0x00000001,
1095 	0xe40, 0x00001fff, 0x00001fff,
1096 	0x9060, 0x0000007f, 0x00000020,
1097 	0x9508, 0x00010000, 0x00010000,
1098 	0xac14, 0x000003ff, 0x000000f3,
1099 	0xac0c, 0xffffffff, 0x00001032
1100 };
1101 
1102 static const u32 bonaire_mgcg_cgcg_init[] =
1103 {
1104 	0xc420, 0xffffffff, 0xfffffffc,
1105 	0x30800, 0xffffffff, 0xe0000000,
1106 	0x3c2a0, 0xffffffff, 0x00000100,
1107 	0x3c208, 0xffffffff, 0x00000100,
1108 	0x3c2c0, 0xffffffff, 0xc0000100,
1109 	0x3c2c8, 0xffffffff, 0xc0000100,
1110 	0x3c2c4, 0xffffffff, 0xc0000100,
1111 	0x55e4, 0xffffffff, 0x00600100,
1112 	0x3c280, 0xffffffff, 0x00000100,
1113 	0x3c214, 0xffffffff, 0x06000100,
1114 	0x3c220, 0xffffffff, 0x00000100,
1115 	0x3c218, 0xffffffff, 0x06000100,
1116 	0x3c204, 0xffffffff, 0x00000100,
1117 	0x3c2e0, 0xffffffff, 0x00000100,
1118 	0x3c224, 0xffffffff, 0x00000100,
1119 	0x3c200, 0xffffffff, 0x00000100,
1120 	0x3c230, 0xffffffff, 0x00000100,
1121 	0x3c234, 0xffffffff, 0x00000100,
1122 	0x3c250, 0xffffffff, 0x00000100,
1123 	0x3c254, 0xffffffff, 0x00000100,
1124 	0x3c258, 0xffffffff, 0x00000100,
1125 	0x3c25c, 0xffffffff, 0x00000100,
1126 	0x3c260, 0xffffffff, 0x00000100,
1127 	0x3c27c, 0xffffffff, 0x00000100,
1128 	0x3c278, 0xffffffff, 0x00000100,
1129 	0x3c210, 0xffffffff, 0x06000100,
1130 	0x3c290, 0xffffffff, 0x00000100,
1131 	0x3c274, 0xffffffff, 0x00000100,
1132 	0x3c2b4, 0xffffffff, 0x00000100,
1133 	0x3c2b0, 0xffffffff, 0x00000100,
1134 	0x3c270, 0xffffffff, 0x00000100,
1135 	0x30800, 0xffffffff, 0xe0000000,
1136 	0x3c020, 0xffffffff, 0x00010000,
1137 	0x3c024, 0xffffffff, 0x00030002,
1138 	0x3c028, 0xffffffff, 0x00040007,
1139 	0x3c02c, 0xffffffff, 0x00060005,
1140 	0x3c030, 0xffffffff, 0x00090008,
1141 	0x3c034, 0xffffffff, 0x00010000,
1142 	0x3c038, 0xffffffff, 0x00030002,
1143 	0x3c03c, 0xffffffff, 0x00040007,
1144 	0x3c040, 0xffffffff, 0x00060005,
1145 	0x3c044, 0xffffffff, 0x00090008,
1146 	0x3c048, 0xffffffff, 0x00010000,
1147 	0x3c04c, 0xffffffff, 0x00030002,
1148 	0x3c050, 0xffffffff, 0x00040007,
1149 	0x3c054, 0xffffffff, 0x00060005,
1150 	0x3c058, 0xffffffff, 0x00090008,
1151 	0x3c05c, 0xffffffff, 0x00010000,
1152 	0x3c060, 0xffffffff, 0x00030002,
1153 	0x3c064, 0xffffffff, 0x00040007,
1154 	0x3c068, 0xffffffff, 0x00060005,
1155 	0x3c06c, 0xffffffff, 0x00090008,
1156 	0x3c070, 0xffffffff, 0x00010000,
1157 	0x3c074, 0xffffffff, 0x00030002,
1158 	0x3c078, 0xffffffff, 0x00040007,
1159 	0x3c07c, 0xffffffff, 0x00060005,
1160 	0x3c080, 0xffffffff, 0x00090008,
1161 	0x3c084, 0xffffffff, 0x00010000,
1162 	0x3c088, 0xffffffff, 0x00030002,
1163 	0x3c08c, 0xffffffff, 0x00040007,
1164 	0x3c090, 0xffffffff, 0x00060005,
1165 	0x3c094, 0xffffffff, 0x00090008,
1166 	0x3c098, 0xffffffff, 0x00010000,
1167 	0x3c09c, 0xffffffff, 0x00030002,
1168 	0x3c0a0, 0xffffffff, 0x00040007,
1169 	0x3c0a4, 0xffffffff, 0x00060005,
1170 	0x3c0a8, 0xffffffff, 0x00090008,
1171 	0x3c000, 0xffffffff, 0x96e00200,
1172 	0x8708, 0xffffffff, 0x00900100,
1173 	0xc424, 0xffffffff, 0x0020003f,
1174 	0x38, 0xffffffff, 0x0140001c,
1175 	0x3c, 0x000f0000, 0x000f0000,
1176 	0x220, 0xffffffff, 0xC060000C,
1177 	0x224, 0xc0000fff, 0x00000100,
1178 	0xf90, 0xffffffff, 0x00000100,
1179 	0xf98, 0x00000101, 0x00000000,
1180 	0x20a8, 0xffffffff, 0x00000104,
1181 	0x55e4, 0xff000fff, 0x00000100,
1182 	0x30cc, 0xc0000fff, 0x00000104,
1183 	0xc1e4, 0x00000001, 0x00000001,
1184 	0xd00c, 0xff000ff0, 0x00000100,
1185 	0xd80c, 0xff000ff0, 0x00000100
1186 };
1187 
1188 static const u32 spectre_golden_spm_registers[] =
1189 {
1190 	0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192 
1193 static const u32 spectre_golden_common_registers[] =
1194 {
1195 	0xc770, 0xffffffff, 0x00000800,
1196 	0xc774, 0xffffffff, 0x00000800,
1197 	0xc798, 0xffffffff, 0x00007fbf,
1198 	0xc79c, 0xffffffff, 0x00007faf
1199 };
1200 
1201 static const u32 spectre_golden_registers[] =
1202 {
1203 	0x3c000, 0xffff1fff, 0x96940200,
1204 	0x3c00c, 0xffff0001, 0xff000000,
1205 	0x3c200, 0xfffc0fff, 0x00000100,
1206 	0x6ed8, 0x00010101, 0x00010000,
1207 	0x9834, 0xf00fffff, 0x00000400,
1208 	0x9838, 0xfffffffc, 0x00020200,
1209 	0x5bb0, 0x000000f0, 0x00000070,
1210 	0x5bc0, 0xf0311fff, 0x80300000,
1211 	0x98f8, 0x73773777, 0x12010001,
1212 	0x9b7c, 0x00ff0000, 0x00fc0000,
1213 	0x2f48, 0x73773777, 0x12010001,
1214 	0x8a14, 0xf000003f, 0x00000007,
1215 	0x8b24, 0xffffffff, 0x00ffffff,
1216 	0x28350, 0x3f3f3fff, 0x00000082,
1217 	0x28354, 0x0000003f, 0x00000000,
1218 	0x3e78, 0x00000001, 0x00000002,
1219 	0x913c, 0xffff03df, 0x00000004,
1220 	0xc768, 0x00000008, 0x00000008,
1221 	0x8c00, 0x000008ff, 0x00000800,
1222 	0x9508, 0x00010000, 0x00010000,
1223 	0xac0c, 0xffffffff, 0x54763210,
1224 	0x214f8, 0x01ff01ff, 0x00000002,
1225 	0x21498, 0x007ff800, 0x00200000,
1226 	0x2015c, 0xffffffff, 0x00000f40,
1227 	0x30934, 0xffffffff, 0x00000001
1228 };
1229 
1230 static const u32 spectre_mgcg_cgcg_init[] =
1231 {
1232 	0xc420, 0xffffffff, 0xfffffffc,
1233 	0x30800, 0xffffffff, 0xe0000000,
1234 	0x3c2a0, 0xffffffff, 0x00000100,
1235 	0x3c208, 0xffffffff, 0x00000100,
1236 	0x3c2c0, 0xffffffff, 0x00000100,
1237 	0x3c2c8, 0xffffffff, 0x00000100,
1238 	0x3c2c4, 0xffffffff, 0x00000100,
1239 	0x55e4, 0xffffffff, 0x00600100,
1240 	0x3c280, 0xffffffff, 0x00000100,
1241 	0x3c214, 0xffffffff, 0x06000100,
1242 	0x3c220, 0xffffffff, 0x00000100,
1243 	0x3c218, 0xffffffff, 0x06000100,
1244 	0x3c204, 0xffffffff, 0x00000100,
1245 	0x3c2e0, 0xffffffff, 0x00000100,
1246 	0x3c224, 0xffffffff, 0x00000100,
1247 	0x3c200, 0xffffffff, 0x00000100,
1248 	0x3c230, 0xffffffff, 0x00000100,
1249 	0x3c234, 0xffffffff, 0x00000100,
1250 	0x3c250, 0xffffffff, 0x00000100,
1251 	0x3c254, 0xffffffff, 0x00000100,
1252 	0x3c258, 0xffffffff, 0x00000100,
1253 	0x3c25c, 0xffffffff, 0x00000100,
1254 	0x3c260, 0xffffffff, 0x00000100,
1255 	0x3c27c, 0xffffffff, 0x00000100,
1256 	0x3c278, 0xffffffff, 0x00000100,
1257 	0x3c210, 0xffffffff, 0x06000100,
1258 	0x3c290, 0xffffffff, 0x00000100,
1259 	0x3c274, 0xffffffff, 0x00000100,
1260 	0x3c2b4, 0xffffffff, 0x00000100,
1261 	0x3c2b0, 0xffffffff, 0x00000100,
1262 	0x3c270, 0xffffffff, 0x00000100,
1263 	0x30800, 0xffffffff, 0xe0000000,
1264 	0x3c020, 0xffffffff, 0x00010000,
1265 	0x3c024, 0xffffffff, 0x00030002,
1266 	0x3c028, 0xffffffff, 0x00040007,
1267 	0x3c02c, 0xffffffff, 0x00060005,
1268 	0x3c030, 0xffffffff, 0x00090008,
1269 	0x3c034, 0xffffffff, 0x00010000,
1270 	0x3c038, 0xffffffff, 0x00030002,
1271 	0x3c03c, 0xffffffff, 0x00040007,
1272 	0x3c040, 0xffffffff, 0x00060005,
1273 	0x3c044, 0xffffffff, 0x00090008,
1274 	0x3c048, 0xffffffff, 0x00010000,
1275 	0x3c04c, 0xffffffff, 0x00030002,
1276 	0x3c050, 0xffffffff, 0x00040007,
1277 	0x3c054, 0xffffffff, 0x00060005,
1278 	0x3c058, 0xffffffff, 0x00090008,
1279 	0x3c05c, 0xffffffff, 0x00010000,
1280 	0x3c060, 0xffffffff, 0x00030002,
1281 	0x3c064, 0xffffffff, 0x00040007,
1282 	0x3c068, 0xffffffff, 0x00060005,
1283 	0x3c06c, 0xffffffff, 0x00090008,
1284 	0x3c070, 0xffffffff, 0x00010000,
1285 	0x3c074, 0xffffffff, 0x00030002,
1286 	0x3c078, 0xffffffff, 0x00040007,
1287 	0x3c07c, 0xffffffff, 0x00060005,
1288 	0x3c080, 0xffffffff, 0x00090008,
1289 	0x3c084, 0xffffffff, 0x00010000,
1290 	0x3c088, 0xffffffff, 0x00030002,
1291 	0x3c08c, 0xffffffff, 0x00040007,
1292 	0x3c090, 0xffffffff, 0x00060005,
1293 	0x3c094, 0xffffffff, 0x00090008,
1294 	0x3c098, 0xffffffff, 0x00010000,
1295 	0x3c09c, 0xffffffff, 0x00030002,
1296 	0x3c0a0, 0xffffffff, 0x00040007,
1297 	0x3c0a4, 0xffffffff, 0x00060005,
1298 	0x3c0a8, 0xffffffff, 0x00090008,
1299 	0x3c0ac, 0xffffffff, 0x00010000,
1300 	0x3c0b0, 0xffffffff, 0x00030002,
1301 	0x3c0b4, 0xffffffff, 0x00040007,
1302 	0x3c0b8, 0xffffffff, 0x00060005,
1303 	0x3c0bc, 0xffffffff, 0x00090008,
1304 	0x3c000, 0xffffffff, 0x96e00200,
1305 	0x8708, 0xffffffff, 0x00900100,
1306 	0xc424, 0xffffffff, 0x0020003f,
1307 	0x38, 0xffffffff, 0x0140001c,
1308 	0x3c, 0x000f0000, 0x000f0000,
1309 	0x220, 0xffffffff, 0xC060000C,
1310 	0x224, 0xc0000fff, 0x00000100,
1311 	0xf90, 0xffffffff, 0x00000100,
1312 	0xf98, 0x00000101, 0x00000000,
1313 	0x20a8, 0xffffffff, 0x00000104,
1314 	0x55e4, 0xff000fff, 0x00000100,
1315 	0x30cc, 0xc0000fff, 0x00000104,
1316 	0xc1e4, 0x00000001, 0x00000001,
1317 	0xd00c, 0xff000ff0, 0x00000100,
1318 	0xd80c, 0xff000ff0, 0x00000100
1319 };
1320 
1321 static const u32 kalindi_golden_spm_registers[] =
1322 {
1323 	0x30800, 0xe0ffffff, 0xe0000000
1324 };
1325 
1326 static const u32 kalindi_golden_common_registers[] =
1327 {
1328 	0xc770, 0xffffffff, 0x00000800,
1329 	0xc774, 0xffffffff, 0x00000800,
1330 	0xc798, 0xffffffff, 0x00007fbf,
1331 	0xc79c, 0xffffffff, 0x00007faf
1332 };
1333 
1334 static const u32 kalindi_golden_registers[] =
1335 {
1336 	0x3c000, 0xffffdfff, 0x6e944040,
1337 	0x55e4, 0xff607fff, 0xfc000100,
1338 	0x3c220, 0xff000fff, 0x00000100,
1339 	0x3c224, 0xff000fff, 0x00000100,
1340 	0x3c200, 0xfffc0fff, 0x00000100,
1341 	0x6ed8, 0x00010101, 0x00010000,
1342 	0x9830, 0xffffffff, 0x00000000,
1343 	0x9834, 0xf00fffff, 0x00000400,
1344 	0x5bb0, 0x000000f0, 0x00000070,
1345 	0x5bc0, 0xf0311fff, 0x80300000,
1346 	0x98f8, 0x73773777, 0x12010001,
1347 	0x98fc, 0xffffffff, 0x00000010,
1348 	0x9b7c, 0x00ff0000, 0x00fc0000,
1349 	0x8030, 0x00001f0f, 0x0000100a,
1350 	0x2f48, 0x73773777, 0x12010001,
1351 	0x2408, 0x000fffff, 0x000c007f,
1352 	0x8a14, 0xf000003f, 0x00000007,
1353 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1354 	0x30a04, 0x0000ff0f, 0x00000000,
1355 	0x28a4c, 0x07ffffff, 0x06000000,
1356 	0x4d8, 0x00000fff, 0x00000100,
1357 	0x3e78, 0x00000001, 0x00000002,
1358 	0xc768, 0x00000008, 0x00000008,
1359 	0x8c00, 0x000000ff, 0x00000003,
1360 	0x214f8, 0x01ff01ff, 0x00000002,
1361 	0x21498, 0x007ff800, 0x00200000,
1362 	0x2015c, 0xffffffff, 0x00000f40,
1363 	0x88c4, 0x001f3ae3, 0x00000082,
1364 	0x88d4, 0x0000001f, 0x00000010,
1365 	0x30934, 0xffffffff, 0x00000000
1366 };
1367 
1368 static const u32 kalindi_mgcg_cgcg_init[] =
1369 {
1370 	0xc420, 0xffffffff, 0xfffffffc,
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x3c2a0, 0xffffffff, 0x00000100,
1373 	0x3c208, 0xffffffff, 0x00000100,
1374 	0x3c2c0, 0xffffffff, 0x00000100,
1375 	0x3c2c8, 0xffffffff, 0x00000100,
1376 	0x3c2c4, 0xffffffff, 0x00000100,
1377 	0x55e4, 0xffffffff, 0x00600100,
1378 	0x3c280, 0xffffffff, 0x00000100,
1379 	0x3c214, 0xffffffff, 0x06000100,
1380 	0x3c220, 0xffffffff, 0x00000100,
1381 	0x3c218, 0xffffffff, 0x06000100,
1382 	0x3c204, 0xffffffff, 0x00000100,
1383 	0x3c2e0, 0xffffffff, 0x00000100,
1384 	0x3c224, 0xffffffff, 0x00000100,
1385 	0x3c200, 0xffffffff, 0x00000100,
1386 	0x3c230, 0xffffffff, 0x00000100,
1387 	0x3c234, 0xffffffff, 0x00000100,
1388 	0x3c250, 0xffffffff, 0x00000100,
1389 	0x3c254, 0xffffffff, 0x00000100,
1390 	0x3c258, 0xffffffff, 0x00000100,
1391 	0x3c25c, 0xffffffff, 0x00000100,
1392 	0x3c260, 0xffffffff, 0x00000100,
1393 	0x3c27c, 0xffffffff, 0x00000100,
1394 	0x3c278, 0xffffffff, 0x00000100,
1395 	0x3c210, 0xffffffff, 0x06000100,
1396 	0x3c290, 0xffffffff, 0x00000100,
1397 	0x3c274, 0xffffffff, 0x00000100,
1398 	0x3c2b4, 0xffffffff, 0x00000100,
1399 	0x3c2b0, 0xffffffff, 0x00000100,
1400 	0x3c270, 0xffffffff, 0x00000100,
1401 	0x30800, 0xffffffff, 0xe0000000,
1402 	0x3c020, 0xffffffff, 0x00010000,
1403 	0x3c024, 0xffffffff, 0x00030002,
1404 	0x3c028, 0xffffffff, 0x00040007,
1405 	0x3c02c, 0xffffffff, 0x00060005,
1406 	0x3c030, 0xffffffff, 0x00090008,
1407 	0x3c034, 0xffffffff, 0x00010000,
1408 	0x3c038, 0xffffffff, 0x00030002,
1409 	0x3c03c, 0xffffffff, 0x00040007,
1410 	0x3c040, 0xffffffff, 0x00060005,
1411 	0x3c044, 0xffffffff, 0x00090008,
1412 	0x3c000, 0xffffffff, 0x96e00200,
1413 	0x8708, 0xffffffff, 0x00900100,
1414 	0xc424, 0xffffffff, 0x0020003f,
1415 	0x38, 0xffffffff, 0x0140001c,
1416 	0x3c, 0x000f0000, 0x000f0000,
1417 	0x220, 0xffffffff, 0xC060000C,
1418 	0x224, 0xc0000fff, 0x00000100,
1419 	0x20a8, 0xffffffff, 0x00000104,
1420 	0x55e4, 0xff000fff, 0x00000100,
1421 	0x30cc, 0xc0000fff, 0x00000104,
1422 	0xc1e4, 0x00000001, 0x00000001,
1423 	0xd00c, 0xff000ff0, 0x00000100,
1424 	0xd80c, 0xff000ff0, 0x00000100
1425 };
1426 
1427 static const u32 hawaii_golden_spm_registers[] =
1428 {
1429 	0x30800, 0xe0ffffff, 0xe0000000
1430 };
1431 
1432 static const u32 hawaii_golden_common_registers[] =
1433 {
1434 	0x30800, 0xffffffff, 0xe0000000,
1435 	0x28350, 0xffffffff, 0x3a00161a,
1436 	0x28354, 0xffffffff, 0x0000002e,
1437 	0x9a10, 0xffffffff, 0x00018208,
1438 	0x98f8, 0xffffffff, 0x12011003
1439 };
1440 
1441 static const u32 hawaii_golden_registers[] =
1442 {
1443 	0x3354, 0x00000333, 0x00000333,
1444 	0x9a10, 0x00010000, 0x00058208,
1445 	0x9830, 0xffffffff, 0x00000000,
1446 	0x9834, 0xf00fffff, 0x00000400,
1447 	0x9838, 0x0002021c, 0x00020200,
1448 	0xc78, 0x00000080, 0x00000000,
1449 	0x5bb0, 0x000000f0, 0x00000070,
1450 	0x5bc0, 0xf0311fff, 0x80300000,
1451 	0x350c, 0x00810000, 0x408af000,
1452 	0x7030, 0x31000111, 0x00000011,
1453 	0x2f48, 0x73773777, 0x12010001,
1454 	0x2120, 0x0000007f, 0x0000001b,
1455 	0x21dc, 0x00007fb6, 0x00002191,
1456 	0x3628, 0x0000003f, 0x0000000a,
1457 	0x362c, 0x0000003f, 0x0000000a,
1458 	0x2ae4, 0x00073ffe, 0x000022a2,
1459 	0x240c, 0x000007ff, 0x00000000,
1460 	0x8bf0, 0x00002001, 0x00000001,
1461 	0x8b24, 0xffffffff, 0x00ffffff,
1462 	0x30a04, 0x0000ff0f, 0x00000000,
1463 	0x28a4c, 0x07ffffff, 0x06000000,
1464 	0x3e78, 0x00000001, 0x00000002,
1465 	0xc768, 0x00000008, 0x00000008,
1466 	0xc770, 0x00000f00, 0x00000800,
1467 	0xc774, 0x00000f00, 0x00000800,
1468 	0xc798, 0x00ffffff, 0x00ff7fbf,
1469 	0xc79c, 0x00ffffff, 0x00ff7faf,
1470 	0x8c00, 0x000000ff, 0x00000800,
1471 	0xe40, 0x00001fff, 0x00001fff,
1472 	0x9060, 0x0000007f, 0x00000020,
1473 	0x9508, 0x00010000, 0x00010000,
1474 	0xae00, 0x00100000, 0x000ff07c,
1475 	0xac14, 0x000003ff, 0x0000000f,
1476 	0xac10, 0xffffffff, 0x7564fdec,
1477 	0xac0c, 0xffffffff, 0x3120b9a8,
1478 	0xac08, 0x20000000, 0x0f9c0000
1479 };
1480 
1481 static const u32 hawaii_mgcg_cgcg_init[] =
1482 {
1483 	0xc420, 0xffffffff, 0xfffffffd,
1484 	0x30800, 0xffffffff, 0xe0000000,
1485 	0x3c2a0, 0xffffffff, 0x00000100,
1486 	0x3c208, 0xffffffff, 0x00000100,
1487 	0x3c2c0, 0xffffffff, 0x00000100,
1488 	0x3c2c8, 0xffffffff, 0x00000100,
1489 	0x3c2c4, 0xffffffff, 0x00000100,
1490 	0x55e4, 0xffffffff, 0x00200100,
1491 	0x3c280, 0xffffffff, 0x00000100,
1492 	0x3c214, 0xffffffff, 0x06000100,
1493 	0x3c220, 0xffffffff, 0x00000100,
1494 	0x3c218, 0xffffffff, 0x06000100,
1495 	0x3c204, 0xffffffff, 0x00000100,
1496 	0x3c2e0, 0xffffffff, 0x00000100,
1497 	0x3c224, 0xffffffff, 0x00000100,
1498 	0x3c200, 0xffffffff, 0x00000100,
1499 	0x3c230, 0xffffffff, 0x00000100,
1500 	0x3c234, 0xffffffff, 0x00000100,
1501 	0x3c250, 0xffffffff, 0x00000100,
1502 	0x3c254, 0xffffffff, 0x00000100,
1503 	0x3c258, 0xffffffff, 0x00000100,
1504 	0x3c25c, 0xffffffff, 0x00000100,
1505 	0x3c260, 0xffffffff, 0x00000100,
1506 	0x3c27c, 0xffffffff, 0x00000100,
1507 	0x3c278, 0xffffffff, 0x00000100,
1508 	0x3c210, 0xffffffff, 0x06000100,
1509 	0x3c290, 0xffffffff, 0x00000100,
1510 	0x3c274, 0xffffffff, 0x00000100,
1511 	0x3c2b4, 0xffffffff, 0x00000100,
1512 	0x3c2b0, 0xffffffff, 0x00000100,
1513 	0x3c270, 0xffffffff, 0x00000100,
1514 	0x30800, 0xffffffff, 0xe0000000,
1515 	0x3c020, 0xffffffff, 0x00010000,
1516 	0x3c024, 0xffffffff, 0x00030002,
1517 	0x3c028, 0xffffffff, 0x00040007,
1518 	0x3c02c, 0xffffffff, 0x00060005,
1519 	0x3c030, 0xffffffff, 0x00090008,
1520 	0x3c034, 0xffffffff, 0x00010000,
1521 	0x3c038, 0xffffffff, 0x00030002,
1522 	0x3c03c, 0xffffffff, 0x00040007,
1523 	0x3c040, 0xffffffff, 0x00060005,
1524 	0x3c044, 0xffffffff, 0x00090008,
1525 	0x3c048, 0xffffffff, 0x00010000,
1526 	0x3c04c, 0xffffffff, 0x00030002,
1527 	0x3c050, 0xffffffff, 0x00040007,
1528 	0x3c054, 0xffffffff, 0x00060005,
1529 	0x3c058, 0xffffffff, 0x00090008,
1530 	0x3c05c, 0xffffffff, 0x00010000,
1531 	0x3c060, 0xffffffff, 0x00030002,
1532 	0x3c064, 0xffffffff, 0x00040007,
1533 	0x3c068, 0xffffffff, 0x00060005,
1534 	0x3c06c, 0xffffffff, 0x00090008,
1535 	0x3c070, 0xffffffff, 0x00010000,
1536 	0x3c074, 0xffffffff, 0x00030002,
1537 	0x3c078, 0xffffffff, 0x00040007,
1538 	0x3c07c, 0xffffffff, 0x00060005,
1539 	0x3c080, 0xffffffff, 0x00090008,
1540 	0x3c084, 0xffffffff, 0x00010000,
1541 	0x3c088, 0xffffffff, 0x00030002,
1542 	0x3c08c, 0xffffffff, 0x00040007,
1543 	0x3c090, 0xffffffff, 0x00060005,
1544 	0x3c094, 0xffffffff, 0x00090008,
1545 	0x3c098, 0xffffffff, 0x00010000,
1546 	0x3c09c, 0xffffffff, 0x00030002,
1547 	0x3c0a0, 0xffffffff, 0x00040007,
1548 	0x3c0a4, 0xffffffff, 0x00060005,
1549 	0x3c0a8, 0xffffffff, 0x00090008,
1550 	0x3c0ac, 0xffffffff, 0x00010000,
1551 	0x3c0b0, 0xffffffff, 0x00030002,
1552 	0x3c0b4, 0xffffffff, 0x00040007,
1553 	0x3c0b8, 0xffffffff, 0x00060005,
1554 	0x3c0bc, 0xffffffff, 0x00090008,
1555 	0x3c0c0, 0xffffffff, 0x00010000,
1556 	0x3c0c4, 0xffffffff, 0x00030002,
1557 	0x3c0c8, 0xffffffff, 0x00040007,
1558 	0x3c0cc, 0xffffffff, 0x00060005,
1559 	0x3c0d0, 0xffffffff, 0x00090008,
1560 	0x3c0d4, 0xffffffff, 0x00010000,
1561 	0x3c0d8, 0xffffffff, 0x00030002,
1562 	0x3c0dc, 0xffffffff, 0x00040007,
1563 	0x3c0e0, 0xffffffff, 0x00060005,
1564 	0x3c0e4, 0xffffffff, 0x00090008,
1565 	0x3c0e8, 0xffffffff, 0x00010000,
1566 	0x3c0ec, 0xffffffff, 0x00030002,
1567 	0x3c0f0, 0xffffffff, 0x00040007,
1568 	0x3c0f4, 0xffffffff, 0x00060005,
1569 	0x3c0f8, 0xffffffff, 0x00090008,
1570 	0xc318, 0xffffffff, 0x00020200,
1571 	0x3350, 0xffffffff, 0x00000200,
1572 	0x15c0, 0xffffffff, 0x00000400,
1573 	0x55e8, 0xffffffff, 0x00000000,
1574 	0x2f50, 0xffffffff, 0x00000902,
1575 	0x3c000, 0xffffffff, 0x96940200,
1576 	0x8708, 0xffffffff, 0x00900100,
1577 	0xc424, 0xffffffff, 0x0020003f,
1578 	0x38, 0xffffffff, 0x0140001c,
1579 	0x3c, 0x000f0000, 0x000f0000,
1580 	0x220, 0xffffffff, 0xc060000c,
1581 	0x224, 0xc0000fff, 0x00000100,
1582 	0xf90, 0xffffffff, 0x00000100,
1583 	0xf98, 0x00000101, 0x00000000,
1584 	0x20a8, 0xffffffff, 0x00000104,
1585 	0x55e4, 0xff000fff, 0x00000100,
1586 	0x30cc, 0xc0000fff, 0x00000104,
1587 	0xc1e4, 0x00000001, 0x00000001,
1588 	0xd00c, 0xff000ff0, 0x00000100,
1589 	0xd80c, 0xff000ff0, 0x00000100
1590 };
1591 
1592 static const u32 godavari_golden_registers[] =
1593 {
1594 	0x55e4, 0xff607fff, 0xfc000100,
1595 	0x6ed8, 0x00010101, 0x00010000,
1596 	0x9830, 0xffffffff, 0x00000000,
1597 	0x98302, 0xf00fffff, 0x00000400,
1598 	0x6130, 0xffffffff, 0x00010000,
1599 	0x5bb0, 0x000000f0, 0x00000070,
1600 	0x5bc0, 0xf0311fff, 0x80300000,
1601 	0x98f8, 0x73773777, 0x12010001,
1602 	0x98fc, 0xffffffff, 0x00000010,
1603 	0x8030, 0x00001f0f, 0x0000100a,
1604 	0x2f48, 0x73773777, 0x12010001,
1605 	0x2408, 0x000fffff, 0x000c007f,
1606 	0x8a14, 0xf000003f, 0x00000007,
1607 	0x8b24, 0xffffffff, 0x00ff0fff,
1608 	0x30a04, 0x0000ff0f, 0x00000000,
1609 	0x28a4c, 0x07ffffff, 0x06000000,
1610 	0x4d8, 0x00000fff, 0x00000100,
1611 	0xd014, 0x00010000, 0x00810001,
1612 	0xd814, 0x00010000, 0x00810001,
1613 	0x3e78, 0x00000001, 0x00000002,
1614 	0xc768, 0x00000008, 0x00000008,
1615 	0xc770, 0x00000f00, 0x00000800,
1616 	0xc774, 0x00000f00, 0x00000800,
1617 	0xc798, 0x00ffffff, 0x00ff7fbf,
1618 	0xc79c, 0x00ffffff, 0x00ff7faf,
1619 	0x8c00, 0x000000ff, 0x00000001,
1620 	0x214f8, 0x01ff01ff, 0x00000002,
1621 	0x21498, 0x007ff800, 0x00200000,
1622 	0x2015c, 0xffffffff, 0x00000f40,
1623 	0x88c4, 0x001f3ae3, 0x00000082,
1624 	0x88d4, 0x0000001f, 0x00000010,
1625 	0x30934, 0xffffffff, 0x00000000
1626 };
1627 
1628 
1629 static void cik_init_golden_registers(struct radeon_device *rdev)
1630 {
1631 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1632 	mutex_lock(&rdev->grbm_idx_mutex);
1633 	switch (rdev->family) {
1634 	case CHIP_BONAIRE:
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_mgcg_cgcg_init,
1637 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_common_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1644 		radeon_program_register_sequence(rdev,
1645 						 bonaire_golden_spm_registers,
1646 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1647 		break;
1648 	case CHIP_KABINI:
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_mgcg_cgcg_init,
1651 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_common_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_golden_spm_registers,
1660 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1661 		break;
1662 	case CHIP_MULLINS:
1663 		radeon_program_register_sequence(rdev,
1664 						 kalindi_mgcg_cgcg_init,
1665 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1666 		radeon_program_register_sequence(rdev,
1667 						 godavari_golden_registers,
1668 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_common_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1672 		radeon_program_register_sequence(rdev,
1673 						 kalindi_golden_spm_registers,
1674 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1675 		break;
1676 	case CHIP_KAVERI:
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_mgcg_cgcg_init,
1679 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_common_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1686 		radeon_program_register_sequence(rdev,
1687 						 spectre_golden_spm_registers,
1688 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1689 		break;
1690 	case CHIP_HAWAII:
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_mgcg_cgcg_init,
1693 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_common_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1700 		radeon_program_register_sequence(rdev,
1701 						 hawaii_golden_spm_registers,
1702 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1703 		break;
1704 	default:
1705 		break;
1706 	}
1707 	mutex_unlock(&rdev->grbm_idx_mutex);
1708 }
1709 
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720 	u32 reference_clock = rdev->clock.spll.reference_freq;
1721 
1722 	if (rdev->flags & RADEON_IS_IGP) {
1723 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724 			return reference_clock / 2;
1725 	} else {
1726 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727 			return reference_clock / 4;
1728 	}
1729 	return reference_clock;
1730 }
1731 
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743 	if (index < rdev->doorbell.num_doorbells) {
1744 		return readl(rdev->doorbell.ptr + index);
1745 	} else {
1746 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747 		return 0;
1748 	}
1749 }
1750 
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763 	if (index < rdev->doorbell.num_doorbells) {
1764 		writel(v, rdev->doorbell.ptr + index);
1765 	} else {
1766 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767 	}
1768 }
1769 
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771 
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774 	{0x00000070, 0x04400000},
1775 	{0x00000071, 0x80c01803},
1776 	{0x00000072, 0x00004004},
1777 	{0x00000073, 0x00000100},
1778 	{0x00000074, 0x00ff0000},
1779 	{0x00000075, 0x34000000},
1780 	{0x00000076, 0x08000014},
1781 	{0x00000077, 0x00cc08ec},
1782 	{0x00000078, 0x00000400},
1783 	{0x00000079, 0x00000000},
1784 	{0x0000007a, 0x04090000},
1785 	{0x0000007c, 0x00000000},
1786 	{0x0000007e, 0x4408a8e8},
1787 	{0x0000007f, 0x00000304},
1788 	{0x00000080, 0x00000000},
1789 	{0x00000082, 0x00000001},
1790 	{0x00000083, 0x00000002},
1791 	{0x00000084, 0xf3e4f400},
1792 	{0x00000085, 0x052024e3},
1793 	{0x00000087, 0x00000000},
1794 	{0x00000088, 0x01000000},
1795 	{0x0000008a, 0x1c0a0000},
1796 	{0x0000008b, 0xff010000},
1797 	{0x0000008d, 0xffffefff},
1798 	{0x0000008e, 0xfff3efff},
1799 	{0x0000008f, 0xfff3efbf},
1800 	{0x00000092, 0xf7ffffff},
1801 	{0x00000093, 0xffffff7f},
1802 	{0x00000095, 0x00101101},
1803 	{0x00000096, 0x00000fff},
1804 	{0x00000097, 0x00116fff},
1805 	{0x00000098, 0x60010000},
1806 	{0x00000099, 0x10010000},
1807 	{0x0000009a, 0x00006000},
1808 	{0x0000009b, 0x00001000},
1809 	{0x0000009f, 0x00b48000}
1810 };
1811 
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813 
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816 	{0x0000007d, 0x40000000},
1817 	{0x0000007e, 0x40180304},
1818 	{0x0000007f, 0x0000ff00},
1819 	{0x00000081, 0x00000000},
1820 	{0x00000083, 0x00000800},
1821 	{0x00000086, 0x00000000},
1822 	{0x00000087, 0x00000100},
1823 	{0x00000088, 0x00020100},
1824 	{0x00000089, 0x00000000},
1825 	{0x0000008b, 0x00040000},
1826 	{0x0000008c, 0x00000100},
1827 	{0x0000008e, 0xff010000},
1828 	{0x00000090, 0xffffefff},
1829 	{0x00000091, 0xfff3efff},
1830 	{0x00000092, 0xfff3efbf},
1831 	{0x00000093, 0xf7ffffff},
1832 	{0x00000094, 0xffffff7f},
1833 	{0x00000095, 0x00000fff},
1834 	{0x00000096, 0x00116fff},
1835 	{0x00000097, 0x60010000},
1836 	{0x00000098, 0x10010000},
1837 	{0x0000009f, 0x00c79000}
1838 };
1839 
1840 
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858 			     MEID(me & 0x3) |
1859 			     VMID(vmid & 0xf) |
1860 			     QUEUEID(queue & 0x7));
1861 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863 
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875 	const __be32 *fw_data = NULL;
1876 	const __le32 *new_fw_data = NULL;
1877 	u32 running, tmp;
1878 	u32 *io_mc_regs = NULL;
1879 	const __le32 *new_io_mc_regs = NULL;
1880 	int i, regs_size, ucode_size;
1881 
1882 	if (!rdev->mc_fw)
1883 		return -EINVAL;
1884 
1885 	if (rdev->new_fw) {
1886 		const struct mc_firmware_header_v1_0 *hdr =
1887 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888 
1889 		radeon_ucode_print_mc_hdr(&hdr->header);
1890 
1891 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892 		new_io_mc_regs = (const __le32 *)
1893 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895 		new_fw_data = (const __le32 *)
1896 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 	} else {
1898 		ucode_size = rdev->mc_fw->size / 4;
1899 
1900 		switch (rdev->family) {
1901 		case CHIP_BONAIRE:
1902 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904 			break;
1905 		case CHIP_HAWAII:
1906 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1908 			break;
1909 		default:
1910 			return -EINVAL;
1911 		}
1912 		fw_data = (const __be32 *)rdev->mc_fw->data;
1913 	}
1914 
1915 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916 
1917 	if (running == 0) {
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 	}
1966 
1967 	return 0;
1968 }
1969 
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981 	const char *chip_name;
1982 	const char *new_chip_name;
1983 	size_t pfp_req_size, me_req_size, ce_req_size,
1984 		mec_req_size, rlc_req_size, mc_req_size = 0,
1985 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986 	char fw_name[30];
1987 	int new_fw = 0;
1988 	int err;
1989 	int num_fw;
1990 	bool new_smc = false;
1991 
1992 	DRM_DEBUG("\n");
1993 
1994 	switch (rdev->family) {
1995 	case CHIP_BONAIRE:
1996 		chip_name = "BONAIRE";
1997 		if ((rdev->pdev->revision == 0x80) ||
1998 		    (rdev->pdev->revision == 0x81) ||
1999 		    (rdev->pdev->device == 0x665f))
2000 			new_smc = true;
2001 		new_chip_name = "bonaire";
2002 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2004 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011 		num_fw = 8;
2012 		break;
2013 	case CHIP_HAWAII:
2014 		chip_name = "HAWAII";
2015 		if (rdev->pdev->revision == 0x80)
2016 			new_smc = true;
2017 		new_chip_name = "hawaii";
2018 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027 		num_fw = 8;
2028 		break;
2029 	case CHIP_KAVERI:
2030 		chip_name = "KAVERI";
2031 		new_chip_name = "kaveri";
2032 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2034 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038 		num_fw = 7;
2039 		break;
2040 	case CHIP_KABINI:
2041 		chip_name = "KABINI";
2042 		new_chip_name = "kabini";
2043 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2045 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049 		num_fw = 6;
2050 		break;
2051 	case CHIP_MULLINS:
2052 		chip_name = "MULLINS";
2053 		new_chip_name = "mullins";
2054 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2056 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060 		num_fw = 6;
2061 		break;
2062 	default: BUG();
2063 	}
2064 
2065 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->pfp_fw->size != pfp_req_size) {
2075 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076 			       rdev->pfp_fw->size, fw_name);
2077 			err = -EINVAL;
2078 			goto out;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->pfp_fw);
2082 		if (err) {
2083 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084 			       fw_name);
2085 			goto out;
2086 		} else {
2087 			new_fw++;
2088 		}
2089 	}
2090 
2091 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093 	if (err) {
2094 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096 		if (err)
2097 			goto out;
2098 		if (rdev->me_fw->size != me_req_size) {
2099 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100 			       rdev->me_fw->size, fw_name);
2101 			err = -EINVAL;
2102 		}
2103 	} else {
2104 		err = radeon_ucode_validate(rdev->me_fw);
2105 		if (err) {
2106 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123 			       rdev->ce_fw->size, fw_name);
2124 			err = -EINVAL;
2125 		}
2126 	} else {
2127 		err = radeon_ucode_validate(rdev->ce_fw);
2128 		if (err) {
2129 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130 			       fw_name);
2131 			goto out;
2132 		} else {
2133 			new_fw++;
2134 		}
2135 	}
2136 
2137 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139 	if (err) {
2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142 		if (err)
2143 			goto out;
2144 		if (rdev->mec_fw->size != mec_req_size) {
2145 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146 			       rdev->mec_fw->size, fw_name);
2147 			err = -EINVAL;
2148 		}
2149 	} else {
2150 		err = radeon_ucode_validate(rdev->mec_fw);
2151 		if (err) {
2152 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153 			       fw_name);
2154 			goto out;
2155 		} else {
2156 			new_fw++;
2157 		}
2158 	}
2159 
2160 	if (rdev->family == CHIP_KAVERI) {
2161 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163 		if (err) {
2164 			goto out;
2165 		} else {
2166 			err = radeon_ucode_validate(rdev->mec2_fw);
2167 			if (err) {
2168 				goto out;
2169 			} else {
2170 				new_fw++;
2171 			}
2172 		}
2173 	}
2174 
2175 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177 	if (err) {
2178 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180 		if (err)
2181 			goto out;
2182 		if (rdev->rlc_fw->size != rlc_req_size) {
2183 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184 			       rdev->rlc_fw->size, fw_name);
2185 			err = -EINVAL;
2186 		}
2187 	} else {
2188 		err = radeon_ucode_validate(rdev->rlc_fw);
2189 		if (err) {
2190 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191 			       fw_name);
2192 			goto out;
2193 		} else {
2194 			new_fw++;
2195 		}
2196 	}
2197 
2198 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200 	if (err) {
2201 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203 		if (err)
2204 			goto out;
2205 		if (rdev->sdma_fw->size != sdma_req_size) {
2206 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207 			       rdev->sdma_fw->size, fw_name);
2208 			err = -EINVAL;
2209 		}
2210 	} else {
2211 		err = radeon_ucode_validate(rdev->sdma_fw);
2212 		if (err) {
2213 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214 			       fw_name);
2215 			goto out;
2216 		} else {
2217 			new_fw++;
2218 		}
2219 	}
2220 
2221 	/* No SMC, MC ucode on APUs */
2222 	if (!(rdev->flags & RADEON_IS_IGP)) {
2223 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225 		if (err) {
2226 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228 			if (err) {
2229 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231 				if (err)
2232 					goto out;
2233 			}
2234 			if ((rdev->mc_fw->size != mc_req_size) &&
2235 			    (rdev->mc_fw->size != mc2_req_size)){
2236 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237 				       rdev->mc_fw->size, fw_name);
2238 				err = -EINVAL;
2239 			}
2240 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241 		} else {
2242 			err = radeon_ucode_validate(rdev->mc_fw);
2243 			if (err) {
2244 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245 				       fw_name);
2246 				goto out;
2247 			} else {
2248 				new_fw++;
2249 			}
2250 		}
2251 
2252 		if (new_smc)
2253 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254 		else
2255 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257 		if (err) {
2258 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260 			if (err) {
2261 				pr_err("smc: error loading firmware \"%s\"\n",
2262 				       fw_name);
2263 				release_firmware(rdev->smc_fw);
2264 				rdev->smc_fw = NULL;
2265 				err = 0;
2266 			} else if (rdev->smc_fw->size != smc_req_size) {
2267 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268 				       rdev->smc_fw->size, fw_name);
2269 				err = -EINVAL;
2270 			}
2271 		} else {
2272 			err = radeon_ucode_validate(rdev->smc_fw);
2273 			if (err) {
2274 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275 				       fw_name);
2276 				goto out;
2277 			} else {
2278 				new_fw++;
2279 			}
2280 		}
2281 	}
2282 
2283 	if (new_fw == 0) {
2284 		rdev->new_fw = false;
2285 	} else if (new_fw < num_fw) {
2286 		pr_err("ci_fw: mixing new and old firmware!\n");
2287 		err = -EINVAL;
2288 	} else {
2289 		rdev->new_fw = true;
2290 	}
2291 
2292 out:
2293 	if (err) {
2294 		if (err != -EINVAL)
2295 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296 			       fw_name);
2297 		release_firmware(rdev->pfp_fw);
2298 		rdev->pfp_fw = NULL;
2299 		release_firmware(rdev->me_fw);
2300 		rdev->me_fw = NULL;
2301 		release_firmware(rdev->ce_fw);
2302 		rdev->ce_fw = NULL;
2303 		release_firmware(rdev->mec_fw);
2304 		rdev->mec_fw = NULL;
2305 		release_firmware(rdev->mec2_fw);
2306 		rdev->mec2_fw = NULL;
2307 		release_firmware(rdev->rlc_fw);
2308 		rdev->rlc_fw = NULL;
2309 		release_firmware(rdev->sdma_fw);
2310 		rdev->sdma_fw = NULL;
2311 		release_firmware(rdev->mc_fw);
2312 		rdev->mc_fw = NULL;
2313 		release_firmware(rdev->smc_fw);
2314 		rdev->smc_fw = NULL;
2315 	}
2316 	return err;
2317 }
2318 
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335 	u32 *tile = rdev->config.cik.tile_mode_array;
2336 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337 	const u32 num_tile_mode_states =
2338 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339 	const u32 num_secondary_tile_mode_states =
2340 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341 	u32 reg_offset, split_equal_to_row_size;
2342 	u32 num_pipe_configs;
2343 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344 		rdev->config.cik.max_shader_engines;
2345 
2346 	switch (rdev->config.cik.mem_row_size_in_kb) {
2347 	case 1:
2348 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349 		break;
2350 	case 2:
2351 	default:
2352 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353 		break;
2354 	case 4:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356 		break;
2357 	}
2358 
2359 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360 	if (num_pipe_configs > 8)
2361 		num_pipe_configs = 16;
2362 
2363 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364 		tile[reg_offset] = 0;
2365 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366 		macrotile[reg_offset] = 0;
2367 
2368 	switch(num_pipe_configs) {
2369 	case 16:
2370 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 			   TILE_SPLIT(split_equal_to_row_size));
2390 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(split_equal_to_row_size));
2401 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 
2449 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 			   NUM_BANKS(ADDR_SURF_16_BANK));
2453 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 			   NUM_BANKS(ADDR_SURF_16_BANK));
2457 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 			   NUM_BANKS(ADDR_SURF_16_BANK));
2461 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 			   NUM_BANKS(ADDR_SURF_16_BANK));
2465 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 			   NUM_BANKS(ADDR_SURF_8_BANK));
2469 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 			   NUM_BANKS(ADDR_SURF_4_BANK));
2473 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 			   NUM_BANKS(ADDR_SURF_2_BANK));
2477 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			    NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			    NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			    NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			    NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 			    NUM_BANKS(ADDR_SURF_2_BANK));
2505 
2506 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510 		break;
2511 
2512 	case 8:
2513 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 			   TILE_SPLIT(split_equal_to_row_size));
2533 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(split_equal_to_row_size));
2544 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 
2592 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595 				NUM_BANKS(ADDR_SURF_16_BANK));
2596 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 				NUM_BANKS(ADDR_SURF_16_BANK));
2600 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 				NUM_BANKS(ADDR_SURF_16_BANK));
2604 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607 				NUM_BANKS(ADDR_SURF_16_BANK));
2608 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 				NUM_BANKS(ADDR_SURF_8_BANK));
2612 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 				NUM_BANKS(ADDR_SURF_4_BANK));
2616 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619 				NUM_BANKS(ADDR_SURF_2_BANK));
2620 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635 				NUM_BANKS(ADDR_SURF_16_BANK));
2636 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_8_BANK));
2640 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_4_BANK));
2644 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647 				NUM_BANKS(ADDR_SURF_2_BANK));
2648 
2649 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653 		break;
2654 
2655 	case 4:
2656 		if (num_rbs == 4) {
2657 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 			   TILE_SPLIT(split_equal_to_row_size));
2677 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(split_equal_to_row_size));
2688 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 
2736 		} else if (num_rbs < 4) {
2737 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 			   TILE_SPLIT(split_equal_to_row_size));
2757 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(split_equal_to_row_size));
2768 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		}
2816 
2817 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 				NUM_BANKS(ADDR_SURF_16_BANK));
2821 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 				NUM_BANKS(ADDR_SURF_16_BANK));
2829 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 				NUM_BANKS(ADDR_SURF_8_BANK));
2841 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 				NUM_BANKS(ADDR_SURF_4_BANK));
2845 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_16_BANK));
2865 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868 				NUM_BANKS(ADDR_SURF_8_BANK));
2869 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872 				NUM_BANKS(ADDR_SURF_4_BANK));
2873 
2874 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878 		break;
2879 
2880 	case 2:
2881 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 			   PIPE_CONFIG(ADDR_SURF_P2) |
2884 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 			   PIPE_CONFIG(ADDR_SURF_P2) |
2888 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P2) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P2) |
2896 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 			   PIPE_CONFIG(ADDR_SURF_P2) |
2900 			   TILE_SPLIT(split_equal_to_row_size));
2901 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(split_equal_to_row_size));
2912 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913 			   PIPE_CONFIG(ADDR_SURF_P2);
2914 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916 			   PIPE_CONFIG(ADDR_SURF_P2));
2917 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 			    PIPE_CONFIG(ADDR_SURF_P2) |
2920 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 			    PIPE_CONFIG(ADDR_SURF_P2) |
2924 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 			    PIPE_CONFIG(ADDR_SURF_P2) |
2928 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 			    PIPE_CONFIG(ADDR_SURF_P2) |
2931 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 			    PIPE_CONFIG(ADDR_SURF_P2));
2947 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 
2960 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 				NUM_BANKS(ADDR_SURF_16_BANK));
2964 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 				NUM_BANKS(ADDR_SURF_16_BANK));
2968 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 				NUM_BANKS(ADDR_SURF_16_BANK));
2976 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 				NUM_BANKS(ADDR_SURF_16_BANK));
2980 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 				NUM_BANKS(ADDR_SURF_16_BANK));
2984 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987 				NUM_BANKS(ADDR_SURF_8_BANK));
2988 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 				NUM_BANKS(ADDR_SURF_16_BANK));
3012 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015 				NUM_BANKS(ADDR_SURF_8_BANK));
3016 
3017 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021 		break;
3022 
3023 	default:
3024 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025 	}
3026 }
3027 
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040 			     u32 se_num, u32 sh_num)
3041 {
3042 	u32 data = INSTANCE_BROADCAST_WRITES;
3043 
3044 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046 	else if (se_num == 0xffffffff)
3047 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048 	else if (sh_num == 0xffffffff)
3049 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050 	else
3051 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052 	WREG32(GRBM_GFX_INDEX, data);
3053 }
3054 
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065 	u32 i, mask = 0;
3066 
3067 	for (i = 0; i < bit_width; i++) {
3068 		mask <<= 1;
3069 		mask |= 1;
3070 	}
3071 	return mask;
3072 }
3073 
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086 			      u32 max_rb_num_per_se,
3087 			      u32 sh_per_se)
3088 {
3089 	u32 data, mask;
3090 
3091 	data = RREG32(CC_RB_BACKEND_DISABLE);
3092 	if (data & 1)
3093 		data &= BACKEND_DISABLE_MASK;
3094 	else
3095 		data = 0;
3096 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097 
3098 	data >>= BACKEND_DISABLE_SHIFT;
3099 
3100 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101 
3102 	return data & mask;
3103 }
3104 
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116 			 u32 se_num, u32 sh_per_se,
3117 			 u32 max_rb_num_per_se)
3118 {
3119 	int i, j;
3120 	u32 data, mask;
3121 	u32 disabled_rbs = 0;
3122 	u32 enabled_rbs = 0;
3123 
3124 	mutex_lock(&rdev->grbm_idx_mutex);
3125 	for (i = 0; i < se_num; i++) {
3126 		for (j = 0; j < sh_per_se; j++) {
3127 			cik_select_se_sh(rdev, i, j);
3128 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3129 			if (rdev->family == CHIP_HAWAII)
3130 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3131 			else
3132 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3133 		}
3134 	}
3135 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3136 	mutex_unlock(&rdev->grbm_idx_mutex);
3137 
3138 	mask = 1;
3139 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3140 		if (!(disabled_rbs & mask))
3141 			enabled_rbs |= mask;
3142 		mask <<= 1;
3143 	}
3144 
3145 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3146 
3147 	mutex_lock(&rdev->grbm_idx_mutex);
3148 	for (i = 0; i < se_num; i++) {
3149 		cik_select_se_sh(rdev, i, 0xffffffff);
3150 		data = 0;
3151 		for (j = 0; j < sh_per_se; j++) {
3152 			switch (enabled_rbs & 3) {
3153 			case 0:
3154 				if (j == 0)
3155 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3156 				else
3157 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3158 				break;
3159 			case 1:
3160 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3161 				break;
3162 			case 2:
3163 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3164 				break;
3165 			case 3:
3166 			default:
3167 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3168 				break;
3169 			}
3170 			enabled_rbs >>= 2;
3171 		}
3172 		WREG32(PA_SC_RASTER_CONFIG, data);
3173 	}
3174 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3175 	mutex_unlock(&rdev->grbm_idx_mutex);
3176 }
3177 
3178 /**
3179  * cik_gpu_init - setup the 3D engine
3180  *
3181  * @rdev: radeon_device pointer
3182  *
3183  * Configures the 3D engine and tiling configuration
3184  * registers so that the 3D engine is usable.
3185  */
3186 static void cik_gpu_init(struct radeon_device *rdev)
3187 {
3188 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3189 	u32 mc_shared_chmap, mc_arb_ramcfg;
3190 	u32 hdp_host_path_cntl;
3191 	u32 tmp;
3192 	int i, j;
3193 
3194 	switch (rdev->family) {
3195 	case CHIP_BONAIRE:
3196 		rdev->config.cik.max_shader_engines = 2;
3197 		rdev->config.cik.max_tile_pipes = 4;
3198 		rdev->config.cik.max_cu_per_sh = 7;
3199 		rdev->config.cik.max_sh_per_se = 1;
3200 		rdev->config.cik.max_backends_per_se = 2;
3201 		rdev->config.cik.max_texture_channel_caches = 4;
3202 		rdev->config.cik.max_gprs = 256;
3203 		rdev->config.cik.max_gs_threads = 32;
3204 		rdev->config.cik.max_hw_contexts = 8;
3205 
3206 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3207 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3208 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3209 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3210 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3211 		break;
3212 	case CHIP_HAWAII:
3213 		rdev->config.cik.max_shader_engines = 4;
3214 		rdev->config.cik.max_tile_pipes = 16;
3215 		rdev->config.cik.max_cu_per_sh = 11;
3216 		rdev->config.cik.max_sh_per_se = 1;
3217 		rdev->config.cik.max_backends_per_se = 4;
3218 		rdev->config.cik.max_texture_channel_caches = 16;
3219 		rdev->config.cik.max_gprs = 256;
3220 		rdev->config.cik.max_gs_threads = 32;
3221 		rdev->config.cik.max_hw_contexts = 8;
3222 
3223 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3224 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3225 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3226 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3227 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3228 		break;
3229 	case CHIP_KAVERI:
3230 		rdev->config.cik.max_shader_engines = 1;
3231 		rdev->config.cik.max_tile_pipes = 4;
3232 		if ((rdev->pdev->device == 0x1304) ||
3233 		    (rdev->pdev->device == 0x1305) ||
3234 		    (rdev->pdev->device == 0x130C) ||
3235 		    (rdev->pdev->device == 0x130F) ||
3236 		    (rdev->pdev->device == 0x1310) ||
3237 		    (rdev->pdev->device == 0x1311) ||
3238 		    (rdev->pdev->device == 0x131C)) {
3239 			rdev->config.cik.max_cu_per_sh = 8;
3240 			rdev->config.cik.max_backends_per_se = 2;
3241 		} else if ((rdev->pdev->device == 0x1309) ||
3242 			   (rdev->pdev->device == 0x130A) ||
3243 			   (rdev->pdev->device == 0x130D) ||
3244 			   (rdev->pdev->device == 0x1313) ||
3245 			   (rdev->pdev->device == 0x131D)) {
3246 			rdev->config.cik.max_cu_per_sh = 6;
3247 			rdev->config.cik.max_backends_per_se = 2;
3248 		} else if ((rdev->pdev->device == 0x1306) ||
3249 			   (rdev->pdev->device == 0x1307) ||
3250 			   (rdev->pdev->device == 0x130B) ||
3251 			   (rdev->pdev->device == 0x130E) ||
3252 			   (rdev->pdev->device == 0x1315) ||
3253 			   (rdev->pdev->device == 0x1318) ||
3254 			   (rdev->pdev->device == 0x131B)) {
3255 			rdev->config.cik.max_cu_per_sh = 4;
3256 			rdev->config.cik.max_backends_per_se = 1;
3257 		} else {
3258 			rdev->config.cik.max_cu_per_sh = 3;
3259 			rdev->config.cik.max_backends_per_se = 1;
3260 		}
3261 		rdev->config.cik.max_sh_per_se = 1;
3262 		rdev->config.cik.max_texture_channel_caches = 4;
3263 		rdev->config.cik.max_gprs = 256;
3264 		rdev->config.cik.max_gs_threads = 16;
3265 		rdev->config.cik.max_hw_contexts = 8;
3266 
3267 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3268 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3269 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3270 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3271 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3272 		break;
3273 	case CHIP_KABINI:
3274 	case CHIP_MULLINS:
3275 	default:
3276 		rdev->config.cik.max_shader_engines = 1;
3277 		rdev->config.cik.max_tile_pipes = 2;
3278 		rdev->config.cik.max_cu_per_sh = 2;
3279 		rdev->config.cik.max_sh_per_se = 1;
3280 		rdev->config.cik.max_backends_per_se = 1;
3281 		rdev->config.cik.max_texture_channel_caches = 2;
3282 		rdev->config.cik.max_gprs = 256;
3283 		rdev->config.cik.max_gs_threads = 16;
3284 		rdev->config.cik.max_hw_contexts = 8;
3285 
3286 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291 		break;
3292 	}
3293 
3294 	/* Initialize HDP */
3295 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296 		WREG32((0x2c14 + j), 0x00000000);
3297 		WREG32((0x2c18 + j), 0x00000000);
3298 		WREG32((0x2c1c + j), 0x00000000);
3299 		WREG32((0x2c20 + j), 0x00000000);
3300 		WREG32((0x2c24 + j), 0x00000000);
3301 	}
3302 
3303 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304 	WREG32(SRBM_INT_CNTL, 0x1);
3305 	WREG32(SRBM_INT_ACK, 0x1);
3306 
3307 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3308 
3309 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3310 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3311 
3312 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3313 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3314 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3315 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3316 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3317 		rdev->config.cik.mem_row_size_in_kb = 4;
3318 	/* XXX use MC settings? */
3319 	rdev->config.cik.shader_engine_tile_size = 32;
3320 	rdev->config.cik.num_gpus = 1;
3321 	rdev->config.cik.multi_gpu_tile_size = 64;
3322 
3323 	/* fix up row size */
3324 	gb_addr_config &= ~ROW_SIZE_MASK;
3325 	switch (rdev->config.cik.mem_row_size_in_kb) {
3326 	case 1:
3327 	default:
3328 		gb_addr_config |= ROW_SIZE(0);
3329 		break;
3330 	case 2:
3331 		gb_addr_config |= ROW_SIZE(1);
3332 		break;
3333 	case 4:
3334 		gb_addr_config |= ROW_SIZE(2);
3335 		break;
3336 	}
3337 
3338 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3339 	 * not have bank info, so create a custom tiling dword.
3340 	 * bits 3:0   num_pipes
3341 	 * bits 7:4   num_banks
3342 	 * bits 11:8  group_size
3343 	 * bits 15:12 row_size
3344 	 */
3345 	rdev->config.cik.tile_config = 0;
3346 	switch (rdev->config.cik.num_tile_pipes) {
3347 	case 1:
3348 		rdev->config.cik.tile_config |= (0 << 0);
3349 		break;
3350 	case 2:
3351 		rdev->config.cik.tile_config |= (1 << 0);
3352 		break;
3353 	case 4:
3354 		rdev->config.cik.tile_config |= (2 << 0);
3355 		break;
3356 	case 8:
3357 	default:
3358 		/* XXX what about 12? */
3359 		rdev->config.cik.tile_config |= (3 << 0);
3360 		break;
3361 	}
3362 	rdev->config.cik.tile_config |=
3363 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3364 	rdev->config.cik.tile_config |=
3365 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3366 	rdev->config.cik.tile_config |=
3367 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3368 
3369 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3370 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3371 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3372 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3373 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3374 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3375 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3376 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3377 
3378 	cik_tiling_mode_table_init(rdev);
3379 
3380 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3381 		     rdev->config.cik.max_sh_per_se,
3382 		     rdev->config.cik.max_backends_per_se);
3383 
3384 	rdev->config.cik.active_cus = 0;
3385 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3386 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3387 			rdev->config.cik.active_cus +=
3388 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3389 		}
3390 	}
3391 
3392 	/* set HW defaults for 3D engine */
3393 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3394 
3395 	mutex_lock(&rdev->grbm_idx_mutex);
3396 	/*
3397 	 * making sure that the following register writes will be broadcasted
3398 	 * to all the shaders
3399 	 */
3400 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3401 	WREG32(SX_DEBUG_1, 0x20);
3402 
3403 	WREG32(TA_CNTL_AUX, 0x00010000);
3404 
3405 	tmp = RREG32(SPI_CONFIG_CNTL);
3406 	tmp |= 0x03000000;
3407 	WREG32(SPI_CONFIG_CNTL, tmp);
3408 
3409 	WREG32(SQ_CONFIG, 1);
3410 
3411 	WREG32(DB_DEBUG, 0);
3412 
3413 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3414 	tmp |= 0x00000400;
3415 	WREG32(DB_DEBUG2, tmp);
3416 
3417 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3418 	tmp |= 0x00020200;
3419 	WREG32(DB_DEBUG3, tmp);
3420 
3421 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3422 	tmp |= 0x00018208;
3423 	WREG32(CB_HW_CONTROL, tmp);
3424 
3425 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3426 
3427 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3428 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3429 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3430 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3431 
3432 	WREG32(VGT_NUM_INSTANCES, 1);
3433 
3434 	WREG32(CP_PERFMON_CNTL, 0);
3435 
3436 	WREG32(SQ_CONFIG, 0);
3437 
3438 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3439 					  FORCE_EOV_MAX_REZ_CNT(255)));
3440 
3441 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3442 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3443 
3444 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3445 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3446 
3447 	tmp = RREG32(HDP_MISC_CNTL);
3448 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3449 	WREG32(HDP_MISC_CNTL, tmp);
3450 
3451 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3452 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3453 
3454 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3455 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3456 	mutex_unlock(&rdev->grbm_idx_mutex);
3457 
3458 	udelay(50);
3459 }
3460 
3461 /*
3462  * GPU scratch registers helpers function.
3463  */
3464 /**
3465  * cik_scratch_init - setup driver info for CP scratch regs
3466  *
3467  * @rdev: radeon_device pointer
3468  *
3469  * Set up the number and offset of the CP scratch registers.
3470  * NOTE: use of CP scratch registers is a legacy inferface and
3471  * is not used by default on newer asics (r6xx+).  On newer asics,
3472  * memory buffers are used for fences rather than scratch regs.
3473  */
3474 static void cik_scratch_init(struct radeon_device *rdev)
3475 {
3476 	int i;
3477 
3478 	rdev->scratch.num_reg = 7;
3479 	rdev->scratch.reg_base = SCRATCH_REG0;
3480 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3481 		rdev->scratch.free[i] = true;
3482 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3483 	}
3484 }
3485 
3486 /**
3487  * cik_ring_test - basic gfx ring test
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ring: radeon_ring structure holding ring information
3491  *
3492  * Allocate a scratch register and write to it using the gfx ring (CIK).
3493  * Provides a basic gfx ring test to verify that the ring is working.
3494  * Used by cik_cp_gfx_resume();
3495  * Returns 0 on success, error on failure.
3496  */
3497 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3498 {
3499 	uint32_t scratch;
3500 	uint32_t tmp = 0;
3501 	unsigned i;
3502 	int r;
3503 
3504 	r = radeon_scratch_get(rdev, &scratch);
3505 	if (r) {
3506 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3507 		return r;
3508 	}
3509 	WREG32(scratch, 0xCAFEDEAD);
3510 	r = radeon_ring_lock(rdev, ring, 3);
3511 	if (r) {
3512 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3513 		radeon_scratch_free(rdev, scratch);
3514 		return r;
3515 	}
3516 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3517 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3518 	radeon_ring_write(ring, 0xDEADBEEF);
3519 	radeon_ring_unlock_commit(rdev, ring, false);
3520 
3521 	for (i = 0; i < rdev->usec_timeout; i++) {
3522 		tmp = RREG32(scratch);
3523 		if (tmp == 0xDEADBEEF)
3524 			break;
3525 		DRM_UDELAY(1);
3526 	}
3527 	if (i < rdev->usec_timeout) {
3528 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3529 	} else {
3530 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3531 			  ring->idx, scratch, tmp);
3532 		r = -EINVAL;
3533 	}
3534 	radeon_scratch_free(rdev, scratch);
3535 	return r;
3536 }
3537 
3538 /**
3539  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3540  *
3541  * @rdev: radeon_device pointer
3542  * @ridx: radeon ring index
3543  *
3544  * Emits an hdp flush on the cp.
3545  */
3546 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3547 				       int ridx)
3548 {
3549 	struct radeon_ring *ring = &rdev->ring[ridx];
3550 	u32 ref_and_mask;
3551 
3552 	switch (ring->idx) {
3553 	case CAYMAN_RING_TYPE_CP1_INDEX:
3554 	case CAYMAN_RING_TYPE_CP2_INDEX:
3555 	default:
3556 		switch (ring->me) {
3557 		case 0:
3558 			ref_and_mask = CP2 << ring->pipe;
3559 			break;
3560 		case 1:
3561 			ref_and_mask = CP6 << ring->pipe;
3562 			break;
3563 		default:
3564 			return;
3565 		}
3566 		break;
3567 	case RADEON_RING_TYPE_GFX_INDEX:
3568 		ref_and_mask = CP0;
3569 		break;
3570 	}
3571 
3572 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3573 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3574 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3575 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3576 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3577 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3578 	radeon_ring_write(ring, ref_and_mask);
3579 	radeon_ring_write(ring, ref_and_mask);
3580 	radeon_ring_write(ring, 0x20); /* poll interval */
3581 }
3582 
3583 /**
3584  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3585  *
3586  * @rdev: radeon_device pointer
3587  * @fence: radeon fence object
3588  *
3589  * Emits a fence sequnce number on the gfx ring and flushes
3590  * GPU caches.
3591  */
3592 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3593 			     struct radeon_fence *fence)
3594 {
3595 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3596 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3597 
3598 	/* Workaround for cache flush problems. First send a dummy EOP
3599 	 * event down the pipe with seq one below.
3600 	 */
3601 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603 				 EOP_TC_ACTION_EN |
3604 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605 				 EVENT_INDEX(5)));
3606 	radeon_ring_write(ring, addr & 0xfffffffc);
3607 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3608 				DATA_SEL(1) | INT_SEL(0));
3609 	radeon_ring_write(ring, fence->seq - 1);
3610 	radeon_ring_write(ring, 0);
3611 
3612 	/* Then send the real EOP event down the pipe. */
3613 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3614 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3615 				 EOP_TC_ACTION_EN |
3616 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3617 				 EVENT_INDEX(5)));
3618 	radeon_ring_write(ring, addr & 0xfffffffc);
3619 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3620 	radeon_ring_write(ring, fence->seq);
3621 	radeon_ring_write(ring, 0);
3622 }
3623 
3624 /**
3625  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3626  *
3627  * @rdev: radeon_device pointer
3628  * @fence: radeon fence object
3629  *
3630  * Emits a fence sequnce number on the compute ring and flushes
3631  * GPU caches.
3632  */
3633 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3634 				 struct radeon_fence *fence)
3635 {
3636 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3637 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3638 
3639 	/* RELEASE_MEM - flush caches, send int */
3640 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3641 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3642 				 EOP_TC_ACTION_EN |
3643 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3644 				 EVENT_INDEX(5)));
3645 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3646 	radeon_ring_write(ring, addr & 0xfffffffc);
3647 	radeon_ring_write(ring, upper_32_bits(addr));
3648 	radeon_ring_write(ring, fence->seq);
3649 	radeon_ring_write(ring, 0);
3650 }
3651 
3652 /**
3653  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3654  *
3655  * @rdev: radeon_device pointer
3656  * @ring: radeon ring buffer object
3657  * @semaphore: radeon semaphore object
3658  * @emit_wait: Is this a sempahore wait?
3659  *
3660  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3661  * from running ahead of semaphore waits.
3662  */
3663 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3664 			     struct radeon_ring *ring,
3665 			     struct radeon_semaphore *semaphore,
3666 			     bool emit_wait)
3667 {
3668 	uint64_t addr = semaphore->gpu_addr;
3669 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3670 
3671 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3672 	radeon_ring_write(ring, lower_32_bits(addr));
3673 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3674 
3675 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3676 		/* Prevent the PFP from running ahead of the semaphore wait */
3677 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3678 		radeon_ring_write(ring, 0x0);
3679 	}
3680 
3681 	return true;
3682 }
3683 
3684 /**
3685  * cik_copy_cpdma - copy pages using the CP DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @src_offset: src GPU address
3689  * @dst_offset: dst GPU address
3690  * @num_gpu_pages: number of GPU pages to xfer
3691  * @resv: reservation object to sync to
3692  *
3693  * Copy GPU paging using the CP DMA engine (CIK+).
3694  * Used by the radeon ttm implementation to move pages if
3695  * registered as the asic copy callback.
3696  */
3697 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3698 				    uint64_t src_offset, uint64_t dst_offset,
3699 				    unsigned num_gpu_pages,
3700 				    struct reservation_object *resv)
3701 {
3702 	struct radeon_fence *fence;
3703 	struct radeon_sync sync;
3704 	int ring_index = rdev->asic->copy.blit_ring_index;
3705 	struct radeon_ring *ring = &rdev->ring[ring_index];
3706 	u32 size_in_bytes, cur_size_in_bytes, control;
3707 	int i, num_loops;
3708 	int r = 0;
3709 
3710 	radeon_sync_create(&sync);
3711 
3712 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3713 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3714 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3715 	if (r) {
3716 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3717 		radeon_sync_free(rdev, &sync, NULL);
3718 		return ERR_PTR(r);
3719 	}
3720 
3721 	radeon_sync_resv(rdev, &sync, resv, false);
3722 	radeon_sync_rings(rdev, &sync, ring->idx);
3723 
3724 	for (i = 0; i < num_loops; i++) {
3725 		cur_size_in_bytes = size_in_bytes;
3726 		if (cur_size_in_bytes > 0x1fffff)
3727 			cur_size_in_bytes = 0x1fffff;
3728 		size_in_bytes -= cur_size_in_bytes;
3729 		control = 0;
3730 		if (size_in_bytes == 0)
3731 			control |= PACKET3_DMA_DATA_CP_SYNC;
3732 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3733 		radeon_ring_write(ring, control);
3734 		radeon_ring_write(ring, lower_32_bits(src_offset));
3735 		radeon_ring_write(ring, upper_32_bits(src_offset));
3736 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3737 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3738 		radeon_ring_write(ring, cur_size_in_bytes);
3739 		src_offset += cur_size_in_bytes;
3740 		dst_offset += cur_size_in_bytes;
3741 	}
3742 
3743 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3744 	if (r) {
3745 		radeon_ring_unlock_undo(rdev, ring);
3746 		radeon_sync_free(rdev, &sync, NULL);
3747 		return ERR_PTR(r);
3748 	}
3749 
3750 	radeon_ring_unlock_commit(rdev, ring, false);
3751 	radeon_sync_free(rdev, &sync, fence);
3752 
3753 	return fence;
3754 }
3755 
3756 /*
3757  * IB stuff
3758  */
3759 /**
3760  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ib: radeon indirect buffer object
3764  *
3765  * Emits a DE (drawing engine) or CE (constant engine) IB
3766  * on the gfx ring.  IBs are usually generated by userspace
3767  * acceleration drivers and submitted to the kernel for
3768  * scheduling on the ring.  This function schedules the IB
3769  * on the gfx ring for execution by the GPU.
3770  */
3771 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3772 {
3773 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3774 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3775 	u32 header, control = INDIRECT_BUFFER_VALID;
3776 
3777 	if (ib->is_const_ib) {
3778 		/* set switch buffer packet before const IB */
3779 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3780 		radeon_ring_write(ring, 0);
3781 
3782 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3783 	} else {
3784 		u32 next_rptr;
3785 		if (ring->rptr_save_reg) {
3786 			next_rptr = ring->wptr + 3 + 4;
3787 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3788 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3789 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3790 			radeon_ring_write(ring, next_rptr);
3791 		} else if (rdev->wb.enabled) {
3792 			next_rptr = ring->wptr + 5 + 4;
3793 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3794 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3795 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3796 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3797 			radeon_ring_write(ring, next_rptr);
3798 		}
3799 
3800 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3801 	}
3802 
3803 	control |= ib->length_dw | (vm_id << 24);
3804 
3805 	radeon_ring_write(ring, header);
3806 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3807 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3808 	radeon_ring_write(ring, control);
3809 }
3810 
3811 /**
3812  * cik_ib_test - basic gfx ring IB test
3813  *
3814  * @rdev: radeon_device pointer
3815  * @ring: radeon_ring structure holding ring information
3816  *
3817  * Allocate an IB and execute it on the gfx ring (CIK).
3818  * Provides a basic gfx ring test to verify that IBs are working.
3819  * Returns 0 on success, error on failure.
3820  */
3821 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3822 {
3823 	struct radeon_ib ib;
3824 	uint32_t scratch;
3825 	uint32_t tmp = 0;
3826 	unsigned i;
3827 	int r;
3828 
3829 	r = radeon_scratch_get(rdev, &scratch);
3830 	if (r) {
3831 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3832 		return r;
3833 	}
3834 	WREG32(scratch, 0xCAFEDEAD);
3835 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3836 	if (r) {
3837 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3838 		radeon_scratch_free(rdev, scratch);
3839 		return r;
3840 	}
3841 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3842 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3843 	ib.ptr[2] = 0xDEADBEEF;
3844 	ib.length_dw = 3;
3845 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3846 	if (r) {
3847 		radeon_scratch_free(rdev, scratch);
3848 		radeon_ib_free(rdev, &ib);
3849 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3850 		return r;
3851 	}
3852 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3853 		RADEON_USEC_IB_TEST_TIMEOUT));
3854 	if (r < 0) {
3855 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3856 		radeon_scratch_free(rdev, scratch);
3857 		radeon_ib_free(rdev, &ib);
3858 		return r;
3859 	} else if (r == 0) {
3860 		DRM_ERROR("radeon: fence wait timed out.\n");
3861 		radeon_scratch_free(rdev, scratch);
3862 		radeon_ib_free(rdev, &ib);
3863 		return -ETIMEDOUT;
3864 	}
3865 	r = 0;
3866 	for (i = 0; i < rdev->usec_timeout; i++) {
3867 		tmp = RREG32(scratch);
3868 		if (tmp == 0xDEADBEEF)
3869 			break;
3870 		DRM_UDELAY(1);
3871 	}
3872 	if (i < rdev->usec_timeout) {
3873 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3874 	} else {
3875 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3876 			  scratch, tmp);
3877 		r = -EINVAL;
3878 	}
3879 	radeon_scratch_free(rdev, scratch);
3880 	radeon_ib_free(rdev, &ib);
3881 	return r;
3882 }
3883 
3884 /*
3885  * CP.
3886  * On CIK, gfx and compute now have independant command processors.
3887  *
3888  * GFX
3889  * Gfx consists of a single ring and can process both gfx jobs and
3890  * compute jobs.  The gfx CP consists of three microengines (ME):
3891  * PFP - Pre-Fetch Parser
3892  * ME - Micro Engine
3893  * CE - Constant Engine
3894  * The PFP and ME make up what is considered the Drawing Engine (DE).
3895  * The CE is an asynchronous engine used for updating buffer desciptors
3896  * used by the DE so that they can be loaded into cache in parallel
3897  * while the DE is processing state update packets.
3898  *
3899  * Compute
3900  * The compute CP consists of two microengines (ME):
3901  * MEC1 - Compute MicroEngine 1
3902  * MEC2 - Compute MicroEngine 2
3903  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3904  * The queues are exposed to userspace and are programmed directly
3905  * by the compute runtime.
3906  */
3907 /**
3908  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3909  *
3910  * @rdev: radeon_device pointer
3911  * @enable: enable or disable the MEs
3912  *
3913  * Halts or unhalts the gfx MEs.
3914  */
3915 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3916 {
3917 	if (enable)
3918 		WREG32(CP_ME_CNTL, 0);
3919 	else {
3920 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3921 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3922 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3923 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3924 	}
3925 	udelay(50);
3926 }
3927 
3928 /**
3929  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3930  *
3931  * @rdev: radeon_device pointer
3932  *
3933  * Loads the gfx PFP, ME, and CE ucode.
3934  * Returns 0 for success, -EINVAL if the ucode is not available.
3935  */
3936 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3937 {
3938 	int i;
3939 
3940 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3941 		return -EINVAL;
3942 
3943 	cik_cp_gfx_enable(rdev, false);
3944 
3945 	if (rdev->new_fw) {
3946 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3947 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3948 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3949 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3950 		const struct gfx_firmware_header_v1_0 *me_hdr =
3951 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3952 		const __le32 *fw_data;
3953 		u32 fw_size;
3954 
3955 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3956 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3957 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3958 
3959 		/* PFP */
3960 		fw_data = (const __le32 *)
3961 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3962 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3963 		WREG32(CP_PFP_UCODE_ADDR, 0);
3964 		for (i = 0; i < fw_size; i++)
3965 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3966 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3967 
3968 		/* CE */
3969 		fw_data = (const __le32 *)
3970 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3971 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3972 		WREG32(CP_CE_UCODE_ADDR, 0);
3973 		for (i = 0; i < fw_size; i++)
3974 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3975 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3976 
3977 		/* ME */
3978 		fw_data = (const __be32 *)
3979 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3980 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3981 		WREG32(CP_ME_RAM_WADDR, 0);
3982 		for (i = 0; i < fw_size; i++)
3983 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3984 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3986 	} else {
3987 		const __be32 *fw_data;
3988 
3989 		/* PFP */
3990 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3991 		WREG32(CP_PFP_UCODE_ADDR, 0);
3992 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3993 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3994 		WREG32(CP_PFP_UCODE_ADDR, 0);
3995 
3996 		/* CE */
3997 		fw_data = (const __be32 *)rdev->ce_fw->data;
3998 		WREG32(CP_CE_UCODE_ADDR, 0);
3999 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4000 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4001 		WREG32(CP_CE_UCODE_ADDR, 0);
4002 
4003 		/* ME */
4004 		fw_data = (const __be32 *)rdev->me_fw->data;
4005 		WREG32(CP_ME_RAM_WADDR, 0);
4006 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4007 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4008 		WREG32(CP_ME_RAM_WADDR, 0);
4009 	}
4010 
4011 	return 0;
4012 }
4013 
4014 /**
4015  * cik_cp_gfx_start - start the gfx ring
4016  *
4017  * @rdev: radeon_device pointer
4018  *
4019  * Enables the ring and loads the clear state context and other
4020  * packets required to init the ring.
4021  * Returns 0 for success, error for failure.
4022  */
4023 static int cik_cp_gfx_start(struct radeon_device *rdev)
4024 {
4025 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026 	int r, i;
4027 
4028 	/* init the CP */
4029 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4030 	WREG32(CP_ENDIAN_SWAP, 0);
4031 	WREG32(CP_DEVICE_ID, 1);
4032 
4033 	cik_cp_gfx_enable(rdev, true);
4034 
4035 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4036 	if (r) {
4037 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4038 		return r;
4039 	}
4040 
4041 	/* init the CE partitions.  CE only used for gfx on CIK */
4042 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4043 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4044 	radeon_ring_write(ring, 0x8000);
4045 	radeon_ring_write(ring, 0x8000);
4046 
4047 	/* setup clear context state */
4048 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4049 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4050 
4051 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4052 	radeon_ring_write(ring, 0x80000000);
4053 	radeon_ring_write(ring, 0x80000000);
4054 
4055 	for (i = 0; i < cik_default_size; i++)
4056 		radeon_ring_write(ring, cik_default_state[i]);
4057 
4058 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4059 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4060 
4061 	/* set clear context state */
4062 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4063 	radeon_ring_write(ring, 0);
4064 
4065 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4066 	radeon_ring_write(ring, 0x00000316);
4067 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4068 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4069 
4070 	radeon_ring_unlock_commit(rdev, ring, false);
4071 
4072 	return 0;
4073 }
4074 
4075 /**
4076  * cik_cp_gfx_fini - stop the gfx ring
4077  *
4078  * @rdev: radeon_device pointer
4079  *
4080  * Stop the gfx ring and tear down the driver ring
4081  * info.
4082  */
4083 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4084 {
4085 	cik_cp_gfx_enable(rdev, false);
4086 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4087 }
4088 
4089 /**
4090  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4091  *
4092  * @rdev: radeon_device pointer
4093  *
4094  * Program the location and size of the gfx ring buffer
4095  * and test it to make sure it's working.
4096  * Returns 0 for success, error for failure.
4097  */
4098 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4099 {
4100 	struct radeon_ring *ring;
4101 	u32 tmp;
4102 	u32 rb_bufsz;
4103 	u64 rb_addr;
4104 	int r;
4105 
4106 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4107 	if (rdev->family != CHIP_HAWAII)
4108 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4109 
4110 	/* Set the write pointer delay */
4111 	WREG32(CP_RB_WPTR_DELAY, 0);
4112 
4113 	/* set the RB to use vmid 0 */
4114 	WREG32(CP_RB_VMID, 0);
4115 
4116 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4117 
4118 	/* ring 0 - compute and gfx */
4119 	/* Set ring buffer size */
4120 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4121 	rb_bufsz = order_base_2(ring->ring_size / 8);
4122 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4123 #ifdef __BIG_ENDIAN
4124 	tmp |= BUF_SWAP_32BIT;
4125 #endif
4126 	WREG32(CP_RB0_CNTL, tmp);
4127 
4128 	/* Initialize the ring buffer's read and write pointers */
4129 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4130 	ring->wptr = 0;
4131 	WREG32(CP_RB0_WPTR, ring->wptr);
4132 
4133 	/* set the wb address wether it's enabled or not */
4134 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4135 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4136 
4137 	/* scratch register shadowing is no longer supported */
4138 	WREG32(SCRATCH_UMSK, 0);
4139 
4140 	if (!rdev->wb.enabled)
4141 		tmp |= RB_NO_UPDATE;
4142 
4143 	mdelay(1);
4144 	WREG32(CP_RB0_CNTL, tmp);
4145 
4146 	rb_addr = ring->gpu_addr >> 8;
4147 	WREG32(CP_RB0_BASE, rb_addr);
4148 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4149 
4150 	/* start the ring */
4151 	cik_cp_gfx_start(rdev);
4152 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4153 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4154 	if (r) {
4155 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4156 		return r;
4157 	}
4158 
4159 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4160 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4161 
4162 	return 0;
4163 }
4164 
4165 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4166 		     struct radeon_ring *ring)
4167 {
4168 	u32 rptr;
4169 
4170 	if (rdev->wb.enabled)
4171 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4172 	else
4173 		rptr = RREG32(CP_RB0_RPTR);
4174 
4175 	return rptr;
4176 }
4177 
4178 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4179 		     struct radeon_ring *ring)
4180 {
4181 	return RREG32(CP_RB0_WPTR);
4182 }
4183 
4184 void cik_gfx_set_wptr(struct radeon_device *rdev,
4185 		      struct radeon_ring *ring)
4186 {
4187 	WREG32(CP_RB0_WPTR, ring->wptr);
4188 	(void)RREG32(CP_RB0_WPTR);
4189 }
4190 
4191 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4192 			 struct radeon_ring *ring)
4193 {
4194 	u32 rptr;
4195 
4196 	if (rdev->wb.enabled) {
4197 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4198 	} else {
4199 		mutex_lock(&rdev->srbm_mutex);
4200 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4201 		rptr = RREG32(CP_HQD_PQ_RPTR);
4202 		cik_srbm_select(rdev, 0, 0, 0, 0);
4203 		mutex_unlock(&rdev->srbm_mutex);
4204 	}
4205 
4206 	return rptr;
4207 }
4208 
4209 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4210 			 struct radeon_ring *ring)
4211 {
4212 	u32 wptr;
4213 
4214 	if (rdev->wb.enabled) {
4215 		/* XXX check if swapping is necessary on BE */
4216 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4217 	} else {
4218 		mutex_lock(&rdev->srbm_mutex);
4219 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4220 		wptr = RREG32(CP_HQD_PQ_WPTR);
4221 		cik_srbm_select(rdev, 0, 0, 0, 0);
4222 		mutex_unlock(&rdev->srbm_mutex);
4223 	}
4224 
4225 	return wptr;
4226 }
4227 
4228 void cik_compute_set_wptr(struct radeon_device *rdev,
4229 			  struct radeon_ring *ring)
4230 {
4231 	/* XXX check if swapping is necessary on BE */
4232 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4233 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4234 }
4235 
4236 static void cik_compute_stop(struct radeon_device *rdev,
4237 			     struct radeon_ring *ring)
4238 {
4239 	u32 j, tmp;
4240 
4241 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4242 	/* Disable wptr polling. */
4243 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4244 	tmp &= ~WPTR_POLL_EN;
4245 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4246 	/* Disable HQD. */
4247 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4248 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4249 		for (j = 0; j < rdev->usec_timeout; j++) {
4250 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4251 				break;
4252 			udelay(1);
4253 		}
4254 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4255 		WREG32(CP_HQD_PQ_RPTR, 0);
4256 		WREG32(CP_HQD_PQ_WPTR, 0);
4257 	}
4258 	cik_srbm_select(rdev, 0, 0, 0, 0);
4259 }
4260 
4261 /**
4262  * cik_cp_compute_enable - enable/disable the compute CP MEs
4263  *
4264  * @rdev: radeon_device pointer
4265  * @enable: enable or disable the MEs
4266  *
4267  * Halts or unhalts the compute MEs.
4268  */
4269 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4270 {
4271 	if (enable)
4272 		WREG32(CP_MEC_CNTL, 0);
4273 	else {
4274 		/*
4275 		 * To make hibernation reliable we need to clear compute ring
4276 		 * configuration before halting the compute ring.
4277 		 */
4278 		mutex_lock(&rdev->srbm_mutex);
4279 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4280 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4281 		mutex_unlock(&rdev->srbm_mutex);
4282 
4283 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4284 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4285 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4286 	}
4287 	udelay(50);
4288 }
4289 
4290 /**
4291  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Loads the compute MEC1&2 ucode.
4296  * Returns 0 for success, -EINVAL if the ucode is not available.
4297  */
4298 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4299 {
4300 	int i;
4301 
4302 	if (!rdev->mec_fw)
4303 		return -EINVAL;
4304 
4305 	cik_cp_compute_enable(rdev, false);
4306 
4307 	if (rdev->new_fw) {
4308 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4309 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4310 		const __le32 *fw_data;
4311 		u32 fw_size;
4312 
4313 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4314 
4315 		/* MEC1 */
4316 		fw_data = (const __le32 *)
4317 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4318 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4319 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4320 		for (i = 0; i < fw_size; i++)
4321 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4322 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4323 
4324 		/* MEC2 */
4325 		if (rdev->family == CHIP_KAVERI) {
4326 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4327 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4328 
4329 			fw_data = (const __le32 *)
4330 				(rdev->mec2_fw->data +
4331 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4332 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4333 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4334 			for (i = 0; i < fw_size; i++)
4335 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4336 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4337 		}
4338 	} else {
4339 		const __be32 *fw_data;
4340 
4341 		/* MEC1 */
4342 		fw_data = (const __be32 *)rdev->mec_fw->data;
4343 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4345 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4346 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347 
4348 		if (rdev->family == CHIP_KAVERI) {
4349 			/* MEC2 */
4350 			fw_data = (const __be32 *)rdev->mec_fw->data;
4351 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4352 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4353 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4354 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4355 		}
4356 	}
4357 
4358 	return 0;
4359 }
4360 
4361 /**
4362  * cik_cp_compute_start - start the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Enable the compute queues.
4367  * Returns 0 for success, error for failure.
4368  */
4369 static int cik_cp_compute_start(struct radeon_device *rdev)
4370 {
4371 	cik_cp_compute_enable(rdev, true);
4372 
4373 	return 0;
4374 }
4375 
4376 /**
4377  * cik_cp_compute_fini - stop the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Stop the compute queues and tear down the driver queue
4382  * info.
4383  */
4384 static void cik_cp_compute_fini(struct radeon_device *rdev)
4385 {
4386 	int i, idx, r;
4387 
4388 	cik_cp_compute_enable(rdev, false);
4389 
4390 	for (i = 0; i < 2; i++) {
4391 		if (i == 0)
4392 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4393 		else
4394 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4395 
4396 		if (rdev->ring[idx].mqd_obj) {
4397 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4398 			if (unlikely(r != 0))
4399 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4400 
4401 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4402 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4403 
4404 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4405 			rdev->ring[idx].mqd_obj = NULL;
4406 		}
4407 	}
4408 }
4409 
4410 static void cik_mec_fini(struct radeon_device *rdev)
4411 {
4412 	int r;
4413 
4414 	if (rdev->mec.hpd_eop_obj) {
4415 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4416 		if (unlikely(r != 0))
4417 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4418 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4419 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4420 
4421 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4422 		rdev->mec.hpd_eop_obj = NULL;
4423 	}
4424 }
4425 
4426 #define MEC_HPD_SIZE 2048
4427 
4428 static int cik_mec_init(struct radeon_device *rdev)
4429 {
4430 	int r;
4431 	u32 *hpd;
4432 
4433 	/*
4434 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4435 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4436 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4437 	 * be handled by KFD
4438 	 */
4439 	rdev->mec.num_mec = 1;
4440 	rdev->mec.num_pipe = 1;
4441 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4442 
4443 	if (rdev->mec.hpd_eop_obj == NULL) {
4444 		r = radeon_bo_create(rdev,
4445 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4446 				     PAGE_SIZE, true,
4447 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4448 				     &rdev->mec.hpd_eop_obj);
4449 		if (r) {
4450 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4451 			return r;
4452 		}
4453 	}
4454 
4455 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4456 	if (unlikely(r != 0)) {
4457 		cik_mec_fini(rdev);
4458 		return r;
4459 	}
4460 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4461 			  &rdev->mec.hpd_eop_gpu_addr);
4462 	if (r) {
4463 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4464 		cik_mec_fini(rdev);
4465 		return r;
4466 	}
4467 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4468 	if (r) {
4469 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4470 		cik_mec_fini(rdev);
4471 		return r;
4472 	}
4473 
4474 	/* clear memory.  Not sure if this is required or not */
4475 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4476 
4477 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4478 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4479 
4480 	return 0;
4481 }
4482 
4483 struct hqd_registers
4484 {
4485 	u32 cp_mqd_base_addr;
4486 	u32 cp_mqd_base_addr_hi;
4487 	u32 cp_hqd_active;
4488 	u32 cp_hqd_vmid;
4489 	u32 cp_hqd_persistent_state;
4490 	u32 cp_hqd_pipe_priority;
4491 	u32 cp_hqd_queue_priority;
4492 	u32 cp_hqd_quantum;
4493 	u32 cp_hqd_pq_base;
4494 	u32 cp_hqd_pq_base_hi;
4495 	u32 cp_hqd_pq_rptr;
4496 	u32 cp_hqd_pq_rptr_report_addr;
4497 	u32 cp_hqd_pq_rptr_report_addr_hi;
4498 	u32 cp_hqd_pq_wptr_poll_addr;
4499 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4500 	u32 cp_hqd_pq_doorbell_control;
4501 	u32 cp_hqd_pq_wptr;
4502 	u32 cp_hqd_pq_control;
4503 	u32 cp_hqd_ib_base_addr;
4504 	u32 cp_hqd_ib_base_addr_hi;
4505 	u32 cp_hqd_ib_rptr;
4506 	u32 cp_hqd_ib_control;
4507 	u32 cp_hqd_iq_timer;
4508 	u32 cp_hqd_iq_rptr;
4509 	u32 cp_hqd_dequeue_request;
4510 	u32 cp_hqd_dma_offload;
4511 	u32 cp_hqd_sema_cmd;
4512 	u32 cp_hqd_msg_type;
4513 	u32 cp_hqd_atomic0_preop_lo;
4514 	u32 cp_hqd_atomic0_preop_hi;
4515 	u32 cp_hqd_atomic1_preop_lo;
4516 	u32 cp_hqd_atomic1_preop_hi;
4517 	u32 cp_hqd_hq_scheduler0;
4518 	u32 cp_hqd_hq_scheduler1;
4519 	u32 cp_mqd_control;
4520 };
4521 
4522 struct bonaire_mqd
4523 {
4524 	u32 header;
4525 	u32 dispatch_initiator;
4526 	u32 dimensions[3];
4527 	u32 start_idx[3];
4528 	u32 num_threads[3];
4529 	u32 pipeline_stat_enable;
4530 	u32 perf_counter_enable;
4531 	u32 pgm[2];
4532 	u32 tba[2];
4533 	u32 tma[2];
4534 	u32 pgm_rsrc[2];
4535 	u32 vmid;
4536 	u32 resource_limits;
4537 	u32 static_thread_mgmt01[2];
4538 	u32 tmp_ring_size;
4539 	u32 static_thread_mgmt23[2];
4540 	u32 restart[3];
4541 	u32 thread_trace_enable;
4542 	u32 reserved1;
4543 	u32 user_data[16];
4544 	u32 vgtcs_invoke_count[2];
4545 	struct hqd_registers queue_state;
4546 	u32 dequeue_cntr;
4547 	u32 interrupt_queue[64];
4548 };
4549 
4550 /**
4551  * cik_cp_compute_resume - setup the compute queue registers
4552  *
4553  * @rdev: radeon_device pointer
4554  *
4555  * Program the compute queues and test them to make sure they
4556  * are working.
4557  * Returns 0 for success, error for failure.
4558  */
4559 static int cik_cp_compute_resume(struct radeon_device *rdev)
4560 {
4561 	int r, i, j, idx;
4562 	u32 tmp;
4563 	bool use_doorbell = true;
4564 	u64 hqd_gpu_addr;
4565 	u64 mqd_gpu_addr;
4566 	u64 eop_gpu_addr;
4567 	u64 wb_gpu_addr;
4568 	u32 *buf;
4569 	struct bonaire_mqd *mqd;
4570 
4571 	r = cik_cp_compute_start(rdev);
4572 	if (r)
4573 		return r;
4574 
4575 	/* fix up chicken bits */
4576 	tmp = RREG32(CP_CPF_DEBUG);
4577 	tmp |= (1 << 23);
4578 	WREG32(CP_CPF_DEBUG, tmp);
4579 
4580 	/* init the pipes */
4581 	mutex_lock(&rdev->srbm_mutex);
4582 
4583 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4584 
4585 	cik_srbm_select(rdev, 0, 0, 0, 0);
4586 
4587 	/* write the EOP addr */
4588 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4589 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4590 
4591 	/* set the VMID assigned */
4592 	WREG32(CP_HPD_EOP_VMID, 0);
4593 
4594 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4595 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4596 	tmp &= ~EOP_SIZE_MASK;
4597 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4598 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4599 
4600 	mutex_unlock(&rdev->srbm_mutex);
4601 
4602 	/* init the queues.  Just two for now. */
4603 	for (i = 0; i < 2; i++) {
4604 		if (i == 0)
4605 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606 		else
4607 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608 
4609 		if (rdev->ring[idx].mqd_obj == NULL) {
4610 			r = radeon_bo_create(rdev,
4611 					     sizeof(struct bonaire_mqd),
4612 					     PAGE_SIZE, true,
4613 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614 					     NULL, &rdev->ring[idx].mqd_obj);
4615 			if (r) {
4616 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617 				return r;
4618 			}
4619 		}
4620 
4621 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622 		if (unlikely(r != 0)) {
4623 			cik_cp_compute_fini(rdev);
4624 			return r;
4625 		}
4626 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627 				  &mqd_gpu_addr);
4628 		if (r) {
4629 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630 			cik_cp_compute_fini(rdev);
4631 			return r;
4632 		}
4633 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634 		if (r) {
4635 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636 			cik_cp_compute_fini(rdev);
4637 			return r;
4638 		}
4639 
4640 		/* init the mqd struct */
4641 		memset(buf, 0, sizeof(struct bonaire_mqd));
4642 
4643 		mqd = (struct bonaire_mqd *)buf;
4644 		mqd->header = 0xC0310800;
4645 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4646 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4647 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4648 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4649 
4650 		mutex_lock(&rdev->srbm_mutex);
4651 		cik_srbm_select(rdev, rdev->ring[idx].me,
4652 				rdev->ring[idx].pipe,
4653 				rdev->ring[idx].queue, 0);
4654 
4655 		/* disable wptr polling */
4656 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657 		tmp &= ~WPTR_POLL_EN;
4658 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659 
4660 		/* enable doorbell? */
4661 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4662 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663 		if (use_doorbell)
4664 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665 		else
4666 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4669 
4670 		/* disable the queue if it's active */
4671 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4672 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4673 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4674 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4675 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676 			for (j = 0; j < rdev->usec_timeout; j++) {
4677 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678 					break;
4679 				udelay(1);
4680 			}
4681 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684 		}
4685 
4686 		/* set the pointer to the MQD */
4687 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691 		/* set MQD vmid to 0 */
4692 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695 
4696 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702 
4703 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4704 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705 		mqd->queue_state.cp_hqd_pq_control &=
4706 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707 
4708 		mqd->queue_state.cp_hqd_pq_control |=
4709 			order_base_2(rdev->ring[idx].ring_size / 8);
4710 		mqd->queue_state.cp_hqd_pq_control |=
4711 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715 		mqd->queue_state.cp_hqd_pq_control &=
4716 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717 		mqd->queue_state.cp_hqd_pq_control |=
4718 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720 
4721 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722 		if (i == 0)
4723 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724 		else
4725 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731 
4732 		/* set the wb address wether it's enabled or not */
4733 		if (i == 0)
4734 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735 		else
4736 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739 			upper_32_bits(wb_gpu_addr) & 0xffff;
4740 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744 
4745 		/* enable the doorbell if requested */
4746 		if (use_doorbell) {
4747 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4748 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4755 
4756 		} else {
4757 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758 		}
4759 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4761 
4762 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763 		rdev->ring[idx].wptr = 0;
4764 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767 
4768 		/* set the vmid for the queue */
4769 		mqd->queue_state.cp_hqd_vmid = 0;
4770 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771 
4772 		/* activate the queue */
4773 		mqd->queue_state.cp_hqd_active = 1;
4774 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775 
4776 		cik_srbm_select(rdev, 0, 0, 0, 0);
4777 		mutex_unlock(&rdev->srbm_mutex);
4778 
4779 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781 
4782 		rdev->ring[idx].ready = true;
4783 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784 		if (r)
4785 			rdev->ring[idx].ready = false;
4786 	}
4787 
4788 	return 0;
4789 }
4790 
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793 	cik_cp_gfx_enable(rdev, enable);
4794 	cik_cp_compute_enable(rdev, enable);
4795 }
4796 
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799 	int r;
4800 
4801 	r = cik_cp_gfx_load_microcode(rdev);
4802 	if (r)
4803 		return r;
4804 	r = cik_cp_compute_load_microcode(rdev);
4805 	if (r)
4806 		return r;
4807 
4808 	return 0;
4809 }
4810 
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813 	cik_cp_gfx_fini(rdev);
4814 	cik_cp_compute_fini(rdev);
4815 }
4816 
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819 	int r;
4820 
4821 	cik_enable_gui_idle_interrupt(rdev, false);
4822 
4823 	r = cik_cp_load_microcode(rdev);
4824 	if (r)
4825 		return r;
4826 
4827 	r = cik_cp_gfx_resume(rdev);
4828 	if (r)
4829 		return r;
4830 	r = cik_cp_compute_resume(rdev);
4831 	if (r)
4832 		return r;
4833 
4834 	cik_enable_gui_idle_interrupt(rdev, true);
4835 
4836 	return 0;
4837 }
4838 
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842 		RREG32(GRBM_STATUS));
4843 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844 		RREG32(GRBM_STATUS2));
4845 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846 		RREG32(GRBM_STATUS_SE0));
4847 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848 		RREG32(GRBM_STATUS_SE1));
4849 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850 		RREG32(GRBM_STATUS_SE2));
4851 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852 		RREG32(GRBM_STATUS_SE3));
4853 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854 		RREG32(SRBM_STATUS));
4855 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856 		RREG32(SRBM_STATUS2));
4857 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863 		 RREG32(CP_STALLED_STAT1));
4864 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865 		 RREG32(CP_STALLED_STAT2));
4866 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867 		 RREG32(CP_STALLED_STAT3));
4868 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869 		 RREG32(CP_CPF_BUSY_STAT));
4870 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871 		 RREG32(CP_CPF_STALLED_STAT1));
4872 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875 		 RREG32(CP_CPC_STALLED_STAT1));
4876 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878 
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890 	u32 reset_mask = 0;
4891 	u32 tmp;
4892 
4893 	/* GRBM_STATUS */
4894 	tmp = RREG32(GRBM_STATUS);
4895 	if (tmp & (PA_BUSY | SC_BUSY |
4896 		   BCI_BUSY | SX_BUSY |
4897 		   TA_BUSY | VGT_BUSY |
4898 		   DB_BUSY | CB_BUSY |
4899 		   GDS_BUSY | SPI_BUSY |
4900 		   IA_BUSY | IA_BUSY_NO_DMA))
4901 		reset_mask |= RADEON_RESET_GFX;
4902 
4903 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904 		reset_mask |= RADEON_RESET_CP;
4905 
4906 	/* GRBM_STATUS2 */
4907 	tmp = RREG32(GRBM_STATUS2);
4908 	if (tmp & RLC_BUSY)
4909 		reset_mask |= RADEON_RESET_RLC;
4910 
4911 	/* SDMA0_STATUS_REG */
4912 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913 	if (!(tmp & SDMA_IDLE))
4914 		reset_mask |= RADEON_RESET_DMA;
4915 
4916 	/* SDMA1_STATUS_REG */
4917 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918 	if (!(tmp & SDMA_IDLE))
4919 		reset_mask |= RADEON_RESET_DMA1;
4920 
4921 	/* SRBM_STATUS2 */
4922 	tmp = RREG32(SRBM_STATUS2);
4923 	if (tmp & SDMA_BUSY)
4924 		reset_mask |= RADEON_RESET_DMA;
4925 
4926 	if (tmp & SDMA1_BUSY)
4927 		reset_mask |= RADEON_RESET_DMA1;
4928 
4929 	/* SRBM_STATUS */
4930 	tmp = RREG32(SRBM_STATUS);
4931 
4932 	if (tmp & IH_BUSY)
4933 		reset_mask |= RADEON_RESET_IH;
4934 
4935 	if (tmp & SEM_BUSY)
4936 		reset_mask |= RADEON_RESET_SEM;
4937 
4938 	if (tmp & GRBM_RQ_PENDING)
4939 		reset_mask |= RADEON_RESET_GRBM;
4940 
4941 	if (tmp & VMC_BUSY)
4942 		reset_mask |= RADEON_RESET_VMC;
4943 
4944 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945 		   MCC_BUSY | MCD_BUSY))
4946 		reset_mask |= RADEON_RESET_MC;
4947 
4948 	if (evergreen_is_display_hung(rdev))
4949 		reset_mask |= RADEON_RESET_DISPLAY;
4950 
4951 	/* Skip MC reset as it's mostly likely not hung, just busy */
4952 	if (reset_mask & RADEON_RESET_MC) {
4953 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954 		reset_mask &= ~RADEON_RESET_MC;
4955 	}
4956 
4957 	return reset_mask;
4958 }
4959 
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970 	struct evergreen_mc_save save;
4971 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972 	u32 tmp;
4973 
4974 	if (reset_mask == 0)
4975 		return;
4976 
4977 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978 
4979 	cik_print_gpu_status_regs(rdev);
4980 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984 
4985 	/* disable CG/PG */
4986 	cik_fini_pg(rdev);
4987 	cik_fini_cg(rdev);
4988 
4989 	/* stop the rlc */
4990 	cik_rlc_stop(rdev);
4991 
4992 	/* Disable GFX parsing/prefetching */
4993 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994 
4995 	/* Disable MEC parsing/prefetching */
4996 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997 
4998 	if (reset_mask & RADEON_RESET_DMA) {
4999 		/* sdma0 */
5000 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001 		tmp |= SDMA_HALT;
5002 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003 	}
5004 	if (reset_mask & RADEON_RESET_DMA1) {
5005 		/* sdma1 */
5006 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007 		tmp |= SDMA_HALT;
5008 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009 	}
5010 
5011 	evergreen_mc_stop(rdev, &save);
5012 	if (evergreen_mc_wait_for_idle(rdev)) {
5013 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014 	}
5015 
5016 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018 
5019 	if (reset_mask & RADEON_RESET_CP) {
5020 		grbm_soft_reset |= SOFT_RESET_CP;
5021 
5022 		srbm_soft_reset |= SOFT_RESET_GRBM;
5023 	}
5024 
5025 	if (reset_mask & RADEON_RESET_DMA)
5026 		srbm_soft_reset |= SOFT_RESET_SDMA;
5027 
5028 	if (reset_mask & RADEON_RESET_DMA1)
5029 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5030 
5031 	if (reset_mask & RADEON_RESET_DISPLAY)
5032 		srbm_soft_reset |= SOFT_RESET_DC;
5033 
5034 	if (reset_mask & RADEON_RESET_RLC)
5035 		grbm_soft_reset |= SOFT_RESET_RLC;
5036 
5037 	if (reset_mask & RADEON_RESET_SEM)
5038 		srbm_soft_reset |= SOFT_RESET_SEM;
5039 
5040 	if (reset_mask & RADEON_RESET_IH)
5041 		srbm_soft_reset |= SOFT_RESET_IH;
5042 
5043 	if (reset_mask & RADEON_RESET_GRBM)
5044 		srbm_soft_reset |= SOFT_RESET_GRBM;
5045 
5046 	if (reset_mask & RADEON_RESET_VMC)
5047 		srbm_soft_reset |= SOFT_RESET_VMC;
5048 
5049 	if (!(rdev->flags & RADEON_IS_IGP)) {
5050 		if (reset_mask & RADEON_RESET_MC)
5051 			srbm_soft_reset |= SOFT_RESET_MC;
5052 	}
5053 
5054 	if (grbm_soft_reset) {
5055 		tmp = RREG32(GRBM_SOFT_RESET);
5056 		tmp |= grbm_soft_reset;
5057 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058 		WREG32(GRBM_SOFT_RESET, tmp);
5059 		tmp = RREG32(GRBM_SOFT_RESET);
5060 
5061 		udelay(50);
5062 
5063 		tmp &= ~grbm_soft_reset;
5064 		WREG32(GRBM_SOFT_RESET, tmp);
5065 		tmp = RREG32(GRBM_SOFT_RESET);
5066 	}
5067 
5068 	if (srbm_soft_reset) {
5069 		tmp = RREG32(SRBM_SOFT_RESET);
5070 		tmp |= srbm_soft_reset;
5071 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072 		WREG32(SRBM_SOFT_RESET, tmp);
5073 		tmp = RREG32(SRBM_SOFT_RESET);
5074 
5075 		udelay(50);
5076 
5077 		tmp &= ~srbm_soft_reset;
5078 		WREG32(SRBM_SOFT_RESET, tmp);
5079 		tmp = RREG32(SRBM_SOFT_RESET);
5080 	}
5081 
5082 	/* Wait a little for things to settle down */
5083 	udelay(50);
5084 
5085 	evergreen_mc_resume(rdev, &save);
5086 	udelay(50);
5087 
5088 	cik_print_gpu_status_regs(rdev);
5089 }
5090 
5091 struct kv_reset_save_regs {
5092 	u32 gmcon_reng_execute;
5093 	u32 gmcon_misc;
5094 	u32 gmcon_misc3;
5095 };
5096 
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098 				   struct kv_reset_save_regs *save)
5099 {
5100 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101 	save->gmcon_misc = RREG32(GMCON_MISC);
5102 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103 
5104 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106 						STCTRL_STUTTER_EN));
5107 }
5108 
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110 				      struct kv_reset_save_regs *save)
5111 {
5112 	int i;
5113 
5114 	WREG32(GMCON_PGFSM_WRITE, 0);
5115 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116 
5117 	for (i = 0; i < 5; i++)
5118 		WREG32(GMCON_PGFSM_WRITE, 0);
5119 
5120 	WREG32(GMCON_PGFSM_WRITE, 0);
5121 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122 
5123 	for (i = 0; i < 5; i++)
5124 		WREG32(GMCON_PGFSM_WRITE, 0);
5125 
5126 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128 
5129 	for (i = 0; i < 5; i++)
5130 		WREG32(GMCON_PGFSM_WRITE, 0);
5131 
5132 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134 
5135 	for (i = 0; i < 5; i++)
5136 		WREG32(GMCON_PGFSM_WRITE, 0);
5137 
5138 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140 
5141 	for (i = 0; i < 5; i++)
5142 		WREG32(GMCON_PGFSM_WRITE, 0);
5143 
5144 	WREG32(GMCON_PGFSM_WRITE, 0);
5145 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146 
5147 	for (i = 0; i < 5; i++)
5148 		WREG32(GMCON_PGFSM_WRITE, 0);
5149 
5150 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152 
5153 	for (i = 0; i < 5; i++)
5154 		WREG32(GMCON_PGFSM_WRITE, 0);
5155 
5156 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158 
5159 	for (i = 0; i < 5; i++)
5160 		WREG32(GMCON_PGFSM_WRITE, 0);
5161 
5162 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164 
5165 	for (i = 0; i < 5; i++)
5166 		WREG32(GMCON_PGFSM_WRITE, 0);
5167 
5168 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170 
5171 	for (i = 0; i < 5; i++)
5172 		WREG32(GMCON_PGFSM_WRITE, 0);
5173 
5174 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176 
5177 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5178 	WREG32(GMCON_MISC, save->gmcon_misc);
5179 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181 
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184 	struct evergreen_mc_save save;
5185 	struct kv_reset_save_regs kv_save = { 0 };
5186 	u32 tmp, i;
5187 
5188 	dev_info(rdev->dev, "GPU pci config reset\n");
5189 
5190 	/* disable dpm? */
5191 
5192 	/* disable cg/pg */
5193 	cik_fini_pg(rdev);
5194 	cik_fini_cg(rdev);
5195 
5196 	/* Disable GFX parsing/prefetching */
5197 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198 
5199 	/* Disable MEC parsing/prefetching */
5200 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201 
5202 	/* sdma0 */
5203 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204 	tmp |= SDMA_HALT;
5205 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206 	/* sdma1 */
5207 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208 	tmp |= SDMA_HALT;
5209 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210 	/* XXX other engines? */
5211 
5212 	/* halt the rlc, disable cp internal ints */
5213 	cik_rlc_stop(rdev);
5214 
5215 	udelay(50);
5216 
5217 	/* disable mem access */
5218 	evergreen_mc_stop(rdev, &save);
5219 	if (evergreen_mc_wait_for_idle(rdev)) {
5220 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221 	}
5222 
5223 	if (rdev->flags & RADEON_IS_IGP)
5224 		kv_save_regs_for_reset(rdev, &kv_save);
5225 
5226 	/* disable BM */
5227 	pci_clear_master(rdev->pdev);
5228 	/* reset */
5229 	radeon_pci_config_reset(rdev);
5230 
5231 	udelay(100);
5232 
5233 	/* wait for asic to come out of reset */
5234 	for (i = 0; i < rdev->usec_timeout; i++) {
5235 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236 			break;
5237 		udelay(1);
5238 	}
5239 
5240 	/* does asic init need to be run first??? */
5241 	if (rdev->flags & RADEON_IS_IGP)
5242 		kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244 
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257 	u32 reset_mask;
5258 
5259 	if (hard) {
5260 		cik_gpu_pci_config_reset(rdev);
5261 		return 0;
5262 	}
5263 
5264 	reset_mask = cik_gpu_check_soft_reset(rdev);
5265 
5266 	if (reset_mask)
5267 		r600_set_bios_scratch_engine_hung(rdev, true);
5268 
5269 	/* try soft reset */
5270 	cik_gpu_soft_reset(rdev, reset_mask);
5271 
5272 	reset_mask = cik_gpu_check_soft_reset(rdev);
5273 
5274 	/* try pci config reset */
5275 	if (reset_mask && radeon_hard_reset)
5276 		cik_gpu_pci_config_reset(rdev);
5277 
5278 	reset_mask = cik_gpu_check_soft_reset(rdev);
5279 
5280 	if (!reset_mask)
5281 		r600_set_bios_scratch_engine_hung(rdev, false);
5282 
5283 	return 0;
5284 }
5285 
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298 
5299 	if (!(reset_mask & (RADEON_RESET_GFX |
5300 			    RADEON_RESET_COMPUTE |
5301 			    RADEON_RESET_CP))) {
5302 		radeon_ring_lockup_update(rdev, ring);
5303 		return false;
5304 	}
5305 	return radeon_ring_test_lockup(rdev, ring);
5306 }
5307 
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319 	struct evergreen_mc_save save;
5320 	u32 tmp;
5321 	int i, j;
5322 
5323 	/* Initialize HDP */
5324 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325 		WREG32((0x2c14 + j), 0x00000000);
5326 		WREG32((0x2c18 + j), 0x00000000);
5327 		WREG32((0x2c1c + j), 0x00000000);
5328 		WREG32((0x2c20 + j), 0x00000000);
5329 		WREG32((0x2c24 + j), 0x00000000);
5330 	}
5331 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332 
5333 	evergreen_mc_stop(rdev, &save);
5334 	if (radeon_mc_wait_for_idle(rdev)) {
5335 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336 	}
5337 	/* Lockout access through VGA aperture*/
5338 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339 	/* Update configuration */
5340 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341 	       rdev->mc.vram_start >> 12);
5342 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343 	       rdev->mc.vram_end >> 12);
5344 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345 	       rdev->vram_scratch.gpu_addr >> 12);
5346 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348 	WREG32(MC_VM_FB_LOCATION, tmp);
5349 	/* XXX double check these! */
5350 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353 	WREG32(MC_VM_AGP_BASE, 0);
5354 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356 	if (radeon_mc_wait_for_idle(rdev)) {
5357 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358 	}
5359 	evergreen_mc_resume(rdev, &save);
5360 	/* we need to own VRAM, so turn off the VGA renderer here
5361 	 * to stop it overwriting our objects */
5362 	rv515_vga_render_disable(rdev);
5363 }
5364 
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376 	u32 tmp;
5377 	int chansize, numchan;
5378 
5379 	/* Get VRAM informations */
5380 	rdev->mc.vram_is_ddr = true;
5381 	tmp = RREG32(MC_ARB_RAMCFG);
5382 	if (tmp & CHANSIZE_MASK) {
5383 		chansize = 64;
5384 	} else {
5385 		chansize = 32;
5386 	}
5387 	tmp = RREG32(MC_SHARED_CHMAP);
5388 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389 	case 0:
5390 	default:
5391 		numchan = 1;
5392 		break;
5393 	case 1:
5394 		numchan = 2;
5395 		break;
5396 	case 2:
5397 		numchan = 4;
5398 		break;
5399 	case 3:
5400 		numchan = 8;
5401 		break;
5402 	case 4:
5403 		numchan = 3;
5404 		break;
5405 	case 5:
5406 		numchan = 6;
5407 		break;
5408 	case 6:
5409 		numchan = 10;
5410 		break;
5411 	case 7:
5412 		numchan = 12;
5413 		break;
5414 	case 8:
5415 		numchan = 16;
5416 		break;
5417 	}
5418 	rdev->mc.vram_width = numchan * chansize;
5419 	/* Could aper size report 0 ? */
5420 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422 	/* size in MB on si */
5423 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426 	si_vram_gtt_location(rdev, &rdev->mc);
5427 	radeon_update_bandwidth_info(rdev);
5428 
5429 	return 0;
5430 }
5431 
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447 	/* flush hdp cache */
5448 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449 
5450 	/* bits 0-15 are the VM contexts0-15 */
5451 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453 
5454 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5455 {
5456 	int i;
5457 	uint32_t sh_mem_bases, sh_mem_config;
5458 
5459 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5460 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5461 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5462 
5463 	mutex_lock(&rdev->srbm_mutex);
5464 	for (i = 8; i < 16; i++) {
5465 		cik_srbm_select(rdev, 0, 0, 0, i);
5466 		/* CP and shaders */
5467 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5468 		WREG32(SH_MEM_APE1_BASE, 1);
5469 		WREG32(SH_MEM_APE1_LIMIT, 0);
5470 		WREG32(SH_MEM_BASES, sh_mem_bases);
5471 	}
5472 	cik_srbm_select(rdev, 0, 0, 0, 0);
5473 	mutex_unlock(&rdev->srbm_mutex);
5474 }
5475 
5476 /**
5477  * cik_pcie_gart_enable - gart enable
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * This sets up the TLBs, programs the page tables for VMID0,
5482  * sets up the hw for VMIDs 1-15 which are allocated on
5483  * demand, and sets up the global locations for the LDS, GDS,
5484  * and GPUVM for FSA64 clients (CIK).
5485  * Returns 0 for success, errors for failure.
5486  */
5487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5488 {
5489 	int r, i;
5490 
5491 	if (rdev->gart.robj == NULL) {
5492 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5493 		return -EINVAL;
5494 	}
5495 	r = radeon_gart_table_vram_pin(rdev);
5496 	if (r)
5497 		return r;
5498 	/* Setup TLB control */
5499 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5500 	       (0xA << 7) |
5501 	       ENABLE_L1_TLB |
5502 	       ENABLE_L1_FRAGMENT_PROCESSING |
5503 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5504 	       ENABLE_ADVANCED_DRIVER_MODEL |
5505 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5506 	/* Setup L2 cache */
5507 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5508 	       ENABLE_L2_FRAGMENT_PROCESSING |
5509 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5510 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5511 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5512 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5513 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5514 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5515 	       BANK_SELECT(4) |
5516 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5517 	/* setup context0 */
5518 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5519 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5520 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5521 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5522 			(u32)(rdev->dummy_page.addr >> 12));
5523 	WREG32(VM_CONTEXT0_CNTL2, 0);
5524 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5525 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5526 
5527 	WREG32(0x15D4, 0);
5528 	WREG32(0x15D8, 0);
5529 	WREG32(0x15DC, 0);
5530 
5531 	/* restore context1-15 */
5532 	/* set vm size, must be a multiple of 4 */
5533 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5534 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5535 	for (i = 1; i < 16; i++) {
5536 		if (i < 8)
5537 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5538 			       rdev->vm_manager.saved_table_addr[i]);
5539 		else
5540 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5541 			       rdev->vm_manager.saved_table_addr[i]);
5542 	}
5543 
5544 	/* enable context1-15 */
5545 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5546 	       (u32)(rdev->dummy_page.addr >> 12));
5547 	WREG32(VM_CONTEXT1_CNTL2, 4);
5548 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5549 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5550 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5551 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5552 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5553 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5554 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5555 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5556 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5557 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5558 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5559 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5560 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5561 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5562 
5563 	if (rdev->family == CHIP_KAVERI) {
5564 		u32 tmp = RREG32(CHUB_CONTROL);
5565 		tmp &= ~BYPASS_VM;
5566 		WREG32(CHUB_CONTROL, tmp);
5567 	}
5568 
5569 	/* XXX SH_MEM regs */
5570 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5571 	mutex_lock(&rdev->srbm_mutex);
5572 	for (i = 0; i < 16; i++) {
5573 		cik_srbm_select(rdev, 0, 0, 0, i);
5574 		/* CP and shaders */
5575 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5576 		WREG32(SH_MEM_APE1_BASE, 1);
5577 		WREG32(SH_MEM_APE1_LIMIT, 0);
5578 		WREG32(SH_MEM_BASES, 0);
5579 		/* SDMA GFX */
5580 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5581 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5582 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5583 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5584 		/* XXX SDMA RLC - todo */
5585 	}
5586 	cik_srbm_select(rdev, 0, 0, 0, 0);
5587 	mutex_unlock(&rdev->srbm_mutex);
5588 
5589 	cik_pcie_init_compute_vmid(rdev);
5590 
5591 	cik_pcie_gart_tlb_flush(rdev);
5592 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5593 		 (unsigned)(rdev->mc.gtt_size >> 20),
5594 		 (unsigned long long)rdev->gart.table_addr);
5595 	rdev->gart.ready = true;
5596 	return 0;
5597 }
5598 
5599 /**
5600  * cik_pcie_gart_disable - gart disable
5601  *
5602  * @rdev: radeon_device pointer
5603  *
5604  * This disables all VM page table (CIK).
5605  */
5606 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5607 {
5608 	unsigned i;
5609 
5610 	for (i = 1; i < 16; ++i) {
5611 		uint32_t reg;
5612 		if (i < 8)
5613 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5614 		else
5615 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5616 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5617 	}
5618 
5619 	/* Disable all tables */
5620 	WREG32(VM_CONTEXT0_CNTL, 0);
5621 	WREG32(VM_CONTEXT1_CNTL, 0);
5622 	/* Setup TLB control */
5623 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5624 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5625 	/* Setup L2 cache */
5626 	WREG32(VM_L2_CNTL,
5627 	       ENABLE_L2_FRAGMENT_PROCESSING |
5628 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5629 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5630 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5631 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5632 	WREG32(VM_L2_CNTL2, 0);
5633 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5634 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5635 	radeon_gart_table_vram_unpin(rdev);
5636 }
5637 
5638 /**
5639  * cik_pcie_gart_fini - vm fini callback
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Tears down the driver GART/VM setup (CIK).
5644  */
5645 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5646 {
5647 	cik_pcie_gart_disable(rdev);
5648 	radeon_gart_table_vram_free(rdev);
5649 	radeon_gart_fini(rdev);
5650 }
5651 
5652 /* vm parser */
5653 /**
5654  * cik_ib_parse - vm ib_parse callback
5655  *
5656  * @rdev: radeon_device pointer
5657  * @ib: indirect buffer pointer
5658  *
5659  * CIK uses hw IB checking so this is a nop (CIK).
5660  */
5661 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5662 {
5663 	return 0;
5664 }
5665 
5666 /*
5667  * vm
5668  * VMID 0 is the physical GPU addresses as used by the kernel.
5669  * VMIDs 1-15 are used for userspace clients and are handled
5670  * by the radeon vm/hsa code.
5671  */
5672 /**
5673  * cik_vm_init - cik vm init callback
5674  *
5675  * @rdev: radeon_device pointer
5676  *
5677  * Inits cik specific vm parameters (number of VMs, base of vram for
5678  * VMIDs 1-15) (CIK).
5679  * Returns 0 for success.
5680  */
5681 int cik_vm_init(struct radeon_device *rdev)
5682 {
5683 	/*
5684 	 * number of VMs
5685 	 * VMID 0 is reserved for System
5686 	 * radeon graphics/compute will use VMIDs 1-7
5687 	 * amdkfd will use VMIDs 8-15
5688 	 */
5689 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5690 	/* base offset of vram pages */
5691 	if (rdev->flags & RADEON_IS_IGP) {
5692 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5693 		tmp <<= 22;
5694 		rdev->vm_manager.vram_base_offset = tmp;
5695 	} else
5696 		rdev->vm_manager.vram_base_offset = 0;
5697 
5698 	return 0;
5699 }
5700 
5701 /**
5702  * cik_vm_fini - cik vm fini callback
5703  *
5704  * @rdev: radeon_device pointer
5705  *
5706  * Tear down any asic specific VM setup (CIK).
5707  */
5708 void cik_vm_fini(struct radeon_device *rdev)
5709 {
5710 }
5711 
5712 /**
5713  * cik_vm_decode_fault - print human readable fault info
5714  *
5715  * @rdev: radeon_device pointer
5716  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5717  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5718  *
5719  * Print human readable fault information (CIK).
5720  */
5721 static void cik_vm_decode_fault(struct radeon_device *rdev,
5722 				u32 status, u32 addr, u32 mc_client)
5723 {
5724 	u32 mc_id;
5725 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5726 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5727 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5728 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5729 
5730 	if (rdev->family == CHIP_HAWAII)
5731 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5732 	else
5733 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5734 
5735 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5736 	       protections, vmid, addr,
5737 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5738 	       block, mc_client, mc_id);
5739 }
5740 
5741 /**
5742  * cik_vm_flush - cik vm flush using the CP
5743  *
5744  * @rdev: radeon_device pointer
5745  *
5746  * Update the page table base and flush the VM TLB
5747  * using the CP (CIK).
5748  */
5749 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5750 		  unsigned vm_id, uint64_t pd_addr)
5751 {
5752 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5753 
5754 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5755 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5756 				 WRITE_DATA_DST_SEL(0)));
5757 	if (vm_id < 8) {
5758 		radeon_ring_write(ring,
5759 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5760 	} else {
5761 		radeon_ring_write(ring,
5762 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5763 	}
5764 	radeon_ring_write(ring, 0);
5765 	radeon_ring_write(ring, pd_addr >> 12);
5766 
5767 	/* update SH_MEM_* regs */
5768 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5769 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5770 				 WRITE_DATA_DST_SEL(0)));
5771 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5772 	radeon_ring_write(ring, 0);
5773 	radeon_ring_write(ring, VMID(vm_id));
5774 
5775 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5776 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5777 				 WRITE_DATA_DST_SEL(0)));
5778 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5779 	radeon_ring_write(ring, 0);
5780 
5781 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5782 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5783 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5784 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5785 
5786 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5787 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5788 				 WRITE_DATA_DST_SEL(0)));
5789 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5790 	radeon_ring_write(ring, 0);
5791 	radeon_ring_write(ring, VMID(0));
5792 
5793 	/* HDP flush */
5794 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5795 
5796 	/* bits 0-15 are the VM contexts0-15 */
5797 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5798 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5799 				 WRITE_DATA_DST_SEL(0)));
5800 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5801 	radeon_ring_write(ring, 0);
5802 	radeon_ring_write(ring, 1 << vm_id);
5803 
5804 	/* wait for the invalidate to complete */
5805 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5806 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5807 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5808 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5809 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5810 	radeon_ring_write(ring, 0);
5811 	radeon_ring_write(ring, 0); /* ref */
5812 	radeon_ring_write(ring, 0); /* mask */
5813 	radeon_ring_write(ring, 0x20); /* poll interval */
5814 
5815 	/* compute doesn't have PFP */
5816 	if (usepfp) {
5817 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5818 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5819 		radeon_ring_write(ring, 0x0);
5820 	}
5821 }
5822 
5823 /*
5824  * RLC
5825  * The RLC is a multi-purpose microengine that handles a
5826  * variety of functions, the most important of which is
5827  * the interrupt controller.
5828  */
5829 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5830 					  bool enable)
5831 {
5832 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5833 
5834 	if (enable)
5835 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5836 	else
5837 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5838 	WREG32(CP_INT_CNTL_RING0, tmp);
5839 }
5840 
5841 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5842 {
5843 	u32 tmp;
5844 
5845 	tmp = RREG32(RLC_LB_CNTL);
5846 	if (enable)
5847 		tmp |= LOAD_BALANCE_ENABLE;
5848 	else
5849 		tmp &= ~LOAD_BALANCE_ENABLE;
5850 	WREG32(RLC_LB_CNTL, tmp);
5851 }
5852 
5853 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5854 {
5855 	u32 i, j, k;
5856 	u32 mask;
5857 
5858 	mutex_lock(&rdev->grbm_idx_mutex);
5859 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5860 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5861 			cik_select_se_sh(rdev, i, j);
5862 			for (k = 0; k < rdev->usec_timeout; k++) {
5863 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5864 					break;
5865 				udelay(1);
5866 			}
5867 		}
5868 	}
5869 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5870 	mutex_unlock(&rdev->grbm_idx_mutex);
5871 
5872 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5873 	for (k = 0; k < rdev->usec_timeout; k++) {
5874 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5875 			break;
5876 		udelay(1);
5877 	}
5878 }
5879 
5880 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5881 {
5882 	u32 tmp;
5883 
5884 	tmp = RREG32(RLC_CNTL);
5885 	if (tmp != rlc)
5886 		WREG32(RLC_CNTL, rlc);
5887 }
5888 
5889 static u32 cik_halt_rlc(struct radeon_device *rdev)
5890 {
5891 	u32 data, orig;
5892 
5893 	orig = data = RREG32(RLC_CNTL);
5894 
5895 	if (data & RLC_ENABLE) {
5896 		u32 i;
5897 
5898 		data &= ~RLC_ENABLE;
5899 		WREG32(RLC_CNTL, data);
5900 
5901 		for (i = 0; i < rdev->usec_timeout; i++) {
5902 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5903 				break;
5904 			udelay(1);
5905 		}
5906 
5907 		cik_wait_for_rlc_serdes(rdev);
5908 	}
5909 
5910 	return orig;
5911 }
5912 
5913 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5914 {
5915 	u32 tmp, i, mask;
5916 
5917 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5918 	WREG32(RLC_GPR_REG2, tmp);
5919 
5920 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5921 	for (i = 0; i < rdev->usec_timeout; i++) {
5922 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5923 			break;
5924 		udelay(1);
5925 	}
5926 
5927 	for (i = 0; i < rdev->usec_timeout; i++) {
5928 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5929 			break;
5930 		udelay(1);
5931 	}
5932 }
5933 
5934 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5935 {
5936 	u32 tmp;
5937 
5938 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5939 	WREG32(RLC_GPR_REG2, tmp);
5940 }
5941 
5942 /**
5943  * cik_rlc_stop - stop the RLC ME
5944  *
5945  * @rdev: radeon_device pointer
5946  *
5947  * Halt the RLC ME (MicroEngine) (CIK).
5948  */
5949 static void cik_rlc_stop(struct radeon_device *rdev)
5950 {
5951 	WREG32(RLC_CNTL, 0);
5952 
5953 	cik_enable_gui_idle_interrupt(rdev, false);
5954 
5955 	cik_wait_for_rlc_serdes(rdev);
5956 }
5957 
5958 /**
5959  * cik_rlc_start - start the RLC ME
5960  *
5961  * @rdev: radeon_device pointer
5962  *
5963  * Unhalt the RLC ME (MicroEngine) (CIK).
5964  */
5965 static void cik_rlc_start(struct radeon_device *rdev)
5966 {
5967 	WREG32(RLC_CNTL, RLC_ENABLE);
5968 
5969 	cik_enable_gui_idle_interrupt(rdev, true);
5970 
5971 	udelay(50);
5972 }
5973 
5974 /**
5975  * cik_rlc_resume - setup the RLC hw
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Initialize the RLC registers, load the ucode,
5980  * and start the RLC (CIK).
5981  * Returns 0 for success, -EINVAL if the ucode is not available.
5982  */
5983 static int cik_rlc_resume(struct radeon_device *rdev)
5984 {
5985 	u32 i, size, tmp;
5986 
5987 	if (!rdev->rlc_fw)
5988 		return -EINVAL;
5989 
5990 	cik_rlc_stop(rdev);
5991 
5992 	/* disable CG */
5993 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5994 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5995 
5996 	si_rlc_reset(rdev);
5997 
5998 	cik_init_pg(rdev);
5999 
6000 	cik_init_cg(rdev);
6001 
6002 	WREG32(RLC_LB_CNTR_INIT, 0);
6003 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6004 
6005 	mutex_lock(&rdev->grbm_idx_mutex);
6006 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6007 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6008 	WREG32(RLC_LB_PARAMS, 0x00600408);
6009 	WREG32(RLC_LB_CNTL, 0x80000004);
6010 	mutex_unlock(&rdev->grbm_idx_mutex);
6011 
6012 	WREG32(RLC_MC_CNTL, 0);
6013 	WREG32(RLC_UCODE_CNTL, 0);
6014 
6015 	if (rdev->new_fw) {
6016 		const struct rlc_firmware_header_v1_0 *hdr =
6017 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6018 		const __le32 *fw_data = (const __le32 *)
6019 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6020 
6021 		radeon_ucode_print_rlc_hdr(&hdr->header);
6022 
6023 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6024 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6025 		for (i = 0; i < size; i++)
6026 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6027 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6028 	} else {
6029 		const __be32 *fw_data;
6030 
6031 		switch (rdev->family) {
6032 		case CHIP_BONAIRE:
6033 		case CHIP_HAWAII:
6034 		default:
6035 			size = BONAIRE_RLC_UCODE_SIZE;
6036 			break;
6037 		case CHIP_KAVERI:
6038 			size = KV_RLC_UCODE_SIZE;
6039 			break;
6040 		case CHIP_KABINI:
6041 			size = KB_RLC_UCODE_SIZE;
6042 			break;
6043 		case CHIP_MULLINS:
6044 			size = ML_RLC_UCODE_SIZE;
6045 			break;
6046 		}
6047 
6048 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6049 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6050 		for (i = 0; i < size; i++)
6051 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6052 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6053 	}
6054 
6055 	/* XXX - find out what chips support lbpw */
6056 	cik_enable_lbpw(rdev, false);
6057 
6058 	if (rdev->family == CHIP_BONAIRE)
6059 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6060 
6061 	cik_rlc_start(rdev);
6062 
6063 	return 0;
6064 }
6065 
6066 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6067 {
6068 	u32 data, orig, tmp, tmp2;
6069 
6070 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6071 
6072 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6073 		cik_enable_gui_idle_interrupt(rdev, true);
6074 
6075 		tmp = cik_halt_rlc(rdev);
6076 
6077 		mutex_lock(&rdev->grbm_idx_mutex);
6078 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6079 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6080 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6081 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6082 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6083 		mutex_unlock(&rdev->grbm_idx_mutex);
6084 
6085 		cik_update_rlc(rdev, tmp);
6086 
6087 		data |= CGCG_EN | CGLS_EN;
6088 	} else {
6089 		cik_enable_gui_idle_interrupt(rdev, false);
6090 
6091 		RREG32(CB_CGTT_SCLK_CTRL);
6092 		RREG32(CB_CGTT_SCLK_CTRL);
6093 		RREG32(CB_CGTT_SCLK_CTRL);
6094 		RREG32(CB_CGTT_SCLK_CTRL);
6095 
6096 		data &= ~(CGCG_EN | CGLS_EN);
6097 	}
6098 
6099 	if (orig != data)
6100 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6101 
6102 }
6103 
6104 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6105 {
6106 	u32 data, orig, tmp = 0;
6107 
6108 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6109 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6110 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6111 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6112 				data |= CP_MEM_LS_EN;
6113 				if (orig != data)
6114 					WREG32(CP_MEM_SLP_CNTL, data);
6115 			}
6116 		}
6117 
6118 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6119 		data |= 0x00000001;
6120 		data &= 0xfffffffd;
6121 		if (orig != data)
6122 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6123 
6124 		tmp = cik_halt_rlc(rdev);
6125 
6126 		mutex_lock(&rdev->grbm_idx_mutex);
6127 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6128 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6129 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6130 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6131 		WREG32(RLC_SERDES_WR_CTRL, data);
6132 		mutex_unlock(&rdev->grbm_idx_mutex);
6133 
6134 		cik_update_rlc(rdev, tmp);
6135 
6136 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6137 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6138 			data &= ~SM_MODE_MASK;
6139 			data |= SM_MODE(0x2);
6140 			data |= SM_MODE_ENABLE;
6141 			data &= ~CGTS_OVERRIDE;
6142 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6143 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6144 				data &= ~CGTS_LS_OVERRIDE;
6145 			data &= ~ON_MONITOR_ADD_MASK;
6146 			data |= ON_MONITOR_ADD_EN;
6147 			data |= ON_MONITOR_ADD(0x96);
6148 			if (orig != data)
6149 				WREG32(CGTS_SM_CTRL_REG, data);
6150 		}
6151 	} else {
6152 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6153 		data |= 0x00000003;
6154 		if (orig != data)
6155 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6156 
6157 		data = RREG32(RLC_MEM_SLP_CNTL);
6158 		if (data & RLC_MEM_LS_EN) {
6159 			data &= ~RLC_MEM_LS_EN;
6160 			WREG32(RLC_MEM_SLP_CNTL, data);
6161 		}
6162 
6163 		data = RREG32(CP_MEM_SLP_CNTL);
6164 		if (data & CP_MEM_LS_EN) {
6165 			data &= ~CP_MEM_LS_EN;
6166 			WREG32(CP_MEM_SLP_CNTL, data);
6167 		}
6168 
6169 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6170 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6171 		if (orig != data)
6172 			WREG32(CGTS_SM_CTRL_REG, data);
6173 
6174 		tmp = cik_halt_rlc(rdev);
6175 
6176 		mutex_lock(&rdev->grbm_idx_mutex);
6177 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6178 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6179 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6180 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6181 		WREG32(RLC_SERDES_WR_CTRL, data);
6182 		mutex_unlock(&rdev->grbm_idx_mutex);
6183 
6184 		cik_update_rlc(rdev, tmp);
6185 	}
6186 }
6187 
6188 static const u32 mc_cg_registers[] =
6189 {
6190 	MC_HUB_MISC_HUB_CG,
6191 	MC_HUB_MISC_SIP_CG,
6192 	MC_HUB_MISC_VM_CG,
6193 	MC_XPB_CLK_GAT,
6194 	ATC_MISC_CG,
6195 	MC_CITF_MISC_WR_CG,
6196 	MC_CITF_MISC_RD_CG,
6197 	MC_CITF_MISC_VM_CG,
6198 	VM_L2_CG,
6199 };
6200 
6201 static void cik_enable_mc_ls(struct radeon_device *rdev,
6202 			     bool enable)
6203 {
6204 	int i;
6205 	u32 orig, data;
6206 
6207 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6208 		orig = data = RREG32(mc_cg_registers[i]);
6209 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6210 			data |= MC_LS_ENABLE;
6211 		else
6212 			data &= ~MC_LS_ENABLE;
6213 		if (data != orig)
6214 			WREG32(mc_cg_registers[i], data);
6215 	}
6216 }
6217 
6218 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6219 			       bool enable)
6220 {
6221 	int i;
6222 	u32 orig, data;
6223 
6224 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6225 		orig = data = RREG32(mc_cg_registers[i]);
6226 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6227 			data |= MC_CG_ENABLE;
6228 		else
6229 			data &= ~MC_CG_ENABLE;
6230 		if (data != orig)
6231 			WREG32(mc_cg_registers[i], data);
6232 	}
6233 }
6234 
6235 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6236 				 bool enable)
6237 {
6238 	u32 orig, data;
6239 
6240 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6241 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6242 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6243 	} else {
6244 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6245 		data |= 0xff000000;
6246 		if (data != orig)
6247 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6248 
6249 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6250 		data |= 0xff000000;
6251 		if (data != orig)
6252 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6253 	}
6254 }
6255 
6256 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6257 				 bool enable)
6258 {
6259 	u32 orig, data;
6260 
6261 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6262 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6263 		data |= 0x100;
6264 		if (orig != data)
6265 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6266 
6267 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6268 		data |= 0x100;
6269 		if (orig != data)
6270 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6271 	} else {
6272 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6273 		data &= ~0x100;
6274 		if (orig != data)
6275 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6276 
6277 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6278 		data &= ~0x100;
6279 		if (orig != data)
6280 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6281 	}
6282 }
6283 
6284 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6285 				bool enable)
6286 {
6287 	u32 orig, data;
6288 
6289 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6290 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6291 		data = 0xfff;
6292 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6293 
6294 		orig = data = RREG32(UVD_CGC_CTRL);
6295 		data |= DCM;
6296 		if (orig != data)
6297 			WREG32(UVD_CGC_CTRL, data);
6298 	} else {
6299 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6300 		data &= ~0xfff;
6301 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6302 
6303 		orig = data = RREG32(UVD_CGC_CTRL);
6304 		data &= ~DCM;
6305 		if (orig != data)
6306 			WREG32(UVD_CGC_CTRL, data);
6307 	}
6308 }
6309 
6310 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6311 			       bool enable)
6312 {
6313 	u32 orig, data;
6314 
6315 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6316 
6317 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6318 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6319 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6320 	else
6321 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6322 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6323 
6324 	if (orig != data)
6325 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6326 }
6327 
6328 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6329 				bool enable)
6330 {
6331 	u32 orig, data;
6332 
6333 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6334 
6335 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6336 		data &= ~CLOCK_GATING_DIS;
6337 	else
6338 		data |= CLOCK_GATING_DIS;
6339 
6340 	if (orig != data)
6341 		WREG32(HDP_HOST_PATH_CNTL, data);
6342 }
6343 
6344 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6345 			      bool enable)
6346 {
6347 	u32 orig, data;
6348 
6349 	orig = data = RREG32(HDP_MEM_POWER_LS);
6350 
6351 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6352 		data |= HDP_LS_ENABLE;
6353 	else
6354 		data &= ~HDP_LS_ENABLE;
6355 
6356 	if (orig != data)
6357 		WREG32(HDP_MEM_POWER_LS, data);
6358 }
6359 
6360 void cik_update_cg(struct radeon_device *rdev,
6361 		   u32 block, bool enable)
6362 {
6363 
6364 	if (block & RADEON_CG_BLOCK_GFX) {
6365 		cik_enable_gui_idle_interrupt(rdev, false);
6366 		/* order matters! */
6367 		if (enable) {
6368 			cik_enable_mgcg(rdev, true);
6369 			cik_enable_cgcg(rdev, true);
6370 		} else {
6371 			cik_enable_cgcg(rdev, false);
6372 			cik_enable_mgcg(rdev, false);
6373 		}
6374 		cik_enable_gui_idle_interrupt(rdev, true);
6375 	}
6376 
6377 	if (block & RADEON_CG_BLOCK_MC) {
6378 		if (!(rdev->flags & RADEON_IS_IGP)) {
6379 			cik_enable_mc_mgcg(rdev, enable);
6380 			cik_enable_mc_ls(rdev, enable);
6381 		}
6382 	}
6383 
6384 	if (block & RADEON_CG_BLOCK_SDMA) {
6385 		cik_enable_sdma_mgcg(rdev, enable);
6386 		cik_enable_sdma_mgls(rdev, enable);
6387 	}
6388 
6389 	if (block & RADEON_CG_BLOCK_BIF) {
6390 		cik_enable_bif_mgls(rdev, enable);
6391 	}
6392 
6393 	if (block & RADEON_CG_BLOCK_UVD) {
6394 		if (rdev->has_uvd)
6395 			cik_enable_uvd_mgcg(rdev, enable);
6396 	}
6397 
6398 	if (block & RADEON_CG_BLOCK_HDP) {
6399 		cik_enable_hdp_mgcg(rdev, enable);
6400 		cik_enable_hdp_ls(rdev, enable);
6401 	}
6402 
6403 	if (block & RADEON_CG_BLOCK_VCE) {
6404 		vce_v2_0_enable_mgcg(rdev, enable);
6405 	}
6406 }
6407 
6408 static void cik_init_cg(struct radeon_device *rdev)
6409 {
6410 
6411 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6412 
6413 	if (rdev->has_uvd)
6414 		si_init_uvd_internal_cg(rdev);
6415 
6416 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6417 			     RADEON_CG_BLOCK_SDMA |
6418 			     RADEON_CG_BLOCK_BIF |
6419 			     RADEON_CG_BLOCK_UVD |
6420 			     RADEON_CG_BLOCK_HDP), true);
6421 }
6422 
6423 static void cik_fini_cg(struct radeon_device *rdev)
6424 {
6425 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6426 			     RADEON_CG_BLOCK_SDMA |
6427 			     RADEON_CG_BLOCK_BIF |
6428 			     RADEON_CG_BLOCK_UVD |
6429 			     RADEON_CG_BLOCK_HDP), false);
6430 
6431 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6432 }
6433 
6434 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6435 					  bool enable)
6436 {
6437 	u32 data, orig;
6438 
6439 	orig = data = RREG32(RLC_PG_CNTL);
6440 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6441 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6442 	else
6443 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6444 	if (orig != data)
6445 		WREG32(RLC_PG_CNTL, data);
6446 }
6447 
6448 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6449 					  bool enable)
6450 {
6451 	u32 data, orig;
6452 
6453 	orig = data = RREG32(RLC_PG_CNTL);
6454 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6455 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6456 	else
6457 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6458 	if (orig != data)
6459 		WREG32(RLC_PG_CNTL, data);
6460 }
6461 
6462 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6463 {
6464 	u32 data, orig;
6465 
6466 	orig = data = RREG32(RLC_PG_CNTL);
6467 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6468 		data &= ~DISABLE_CP_PG;
6469 	else
6470 		data |= DISABLE_CP_PG;
6471 	if (orig != data)
6472 		WREG32(RLC_PG_CNTL, data);
6473 }
6474 
6475 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6476 {
6477 	u32 data, orig;
6478 
6479 	orig = data = RREG32(RLC_PG_CNTL);
6480 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6481 		data &= ~DISABLE_GDS_PG;
6482 	else
6483 		data |= DISABLE_GDS_PG;
6484 	if (orig != data)
6485 		WREG32(RLC_PG_CNTL, data);
6486 }
6487 
6488 #define CP_ME_TABLE_SIZE    96
6489 #define CP_ME_TABLE_OFFSET  2048
6490 #define CP_MEC_TABLE_OFFSET 4096
6491 
6492 void cik_init_cp_pg_table(struct radeon_device *rdev)
6493 {
6494 	volatile u32 *dst_ptr;
6495 	int me, i, max_me = 4;
6496 	u32 bo_offset = 0;
6497 	u32 table_offset, table_size;
6498 
6499 	if (rdev->family == CHIP_KAVERI)
6500 		max_me = 5;
6501 
6502 	if (rdev->rlc.cp_table_ptr == NULL)
6503 		return;
6504 
6505 	/* write the cp table buffer */
6506 	dst_ptr = rdev->rlc.cp_table_ptr;
6507 	for (me = 0; me < max_me; me++) {
6508 		if (rdev->new_fw) {
6509 			const __le32 *fw_data;
6510 			const struct gfx_firmware_header_v1_0 *hdr;
6511 
6512 			if (me == 0) {
6513 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6514 				fw_data = (const __le32 *)
6515 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6516 				table_offset = le32_to_cpu(hdr->jt_offset);
6517 				table_size = le32_to_cpu(hdr->jt_size);
6518 			} else if (me == 1) {
6519 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6520 				fw_data = (const __le32 *)
6521 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6522 				table_offset = le32_to_cpu(hdr->jt_offset);
6523 				table_size = le32_to_cpu(hdr->jt_size);
6524 			} else if (me == 2) {
6525 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6526 				fw_data = (const __le32 *)
6527 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6528 				table_offset = le32_to_cpu(hdr->jt_offset);
6529 				table_size = le32_to_cpu(hdr->jt_size);
6530 			} else if (me == 3) {
6531 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6532 				fw_data = (const __le32 *)
6533 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6534 				table_offset = le32_to_cpu(hdr->jt_offset);
6535 				table_size = le32_to_cpu(hdr->jt_size);
6536 			} else {
6537 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6538 				fw_data = (const __le32 *)
6539 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6540 				table_offset = le32_to_cpu(hdr->jt_offset);
6541 				table_size = le32_to_cpu(hdr->jt_size);
6542 			}
6543 
6544 			for (i = 0; i < table_size; i ++) {
6545 				dst_ptr[bo_offset + i] =
6546 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6547 			}
6548 			bo_offset += table_size;
6549 		} else {
6550 			const __be32 *fw_data;
6551 			table_size = CP_ME_TABLE_SIZE;
6552 
6553 			if (me == 0) {
6554 				fw_data = (const __be32 *)rdev->ce_fw->data;
6555 				table_offset = CP_ME_TABLE_OFFSET;
6556 			} else if (me == 1) {
6557 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6558 				table_offset = CP_ME_TABLE_OFFSET;
6559 			} else if (me == 2) {
6560 				fw_data = (const __be32 *)rdev->me_fw->data;
6561 				table_offset = CP_ME_TABLE_OFFSET;
6562 			} else {
6563 				fw_data = (const __be32 *)rdev->mec_fw->data;
6564 				table_offset = CP_MEC_TABLE_OFFSET;
6565 			}
6566 
6567 			for (i = 0; i < table_size; i ++) {
6568 				dst_ptr[bo_offset + i] =
6569 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6570 			}
6571 			bo_offset += table_size;
6572 		}
6573 	}
6574 }
6575 
6576 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6577 				bool enable)
6578 {
6579 	u32 data, orig;
6580 
6581 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6582 		orig = data = RREG32(RLC_PG_CNTL);
6583 		data |= GFX_PG_ENABLE;
6584 		if (orig != data)
6585 			WREG32(RLC_PG_CNTL, data);
6586 
6587 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6588 		data |= AUTO_PG_EN;
6589 		if (orig != data)
6590 			WREG32(RLC_AUTO_PG_CTRL, data);
6591 	} else {
6592 		orig = data = RREG32(RLC_PG_CNTL);
6593 		data &= ~GFX_PG_ENABLE;
6594 		if (orig != data)
6595 			WREG32(RLC_PG_CNTL, data);
6596 
6597 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6598 		data &= ~AUTO_PG_EN;
6599 		if (orig != data)
6600 			WREG32(RLC_AUTO_PG_CTRL, data);
6601 
6602 		data = RREG32(DB_RENDER_CONTROL);
6603 	}
6604 }
6605 
6606 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6607 {
6608 	u32 mask = 0, tmp, tmp1;
6609 	int i;
6610 
6611 	mutex_lock(&rdev->grbm_idx_mutex);
6612 	cik_select_se_sh(rdev, se, sh);
6613 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6614 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6615 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6616 	mutex_unlock(&rdev->grbm_idx_mutex);
6617 
6618 	tmp &= 0xffff0000;
6619 
6620 	tmp |= tmp1;
6621 	tmp >>= 16;
6622 
6623 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6624 		mask <<= 1;
6625 		mask |= 1;
6626 	}
6627 
6628 	return (~tmp) & mask;
6629 }
6630 
6631 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6632 {
6633 	u32 i, j, k, active_cu_number = 0;
6634 	u32 mask, counter, cu_bitmap;
6635 	u32 tmp = 0;
6636 
6637 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6638 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6639 			mask = 1;
6640 			cu_bitmap = 0;
6641 			counter = 0;
6642 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6643 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6644 					if (counter < 2)
6645 						cu_bitmap |= mask;
6646 					counter ++;
6647 				}
6648 				mask <<= 1;
6649 			}
6650 
6651 			active_cu_number += counter;
6652 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6653 		}
6654 	}
6655 
6656 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6657 
6658 	tmp = RREG32(RLC_MAX_PG_CU);
6659 	tmp &= ~MAX_PU_CU_MASK;
6660 	tmp |= MAX_PU_CU(active_cu_number);
6661 	WREG32(RLC_MAX_PG_CU, tmp);
6662 }
6663 
6664 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6665 				       bool enable)
6666 {
6667 	u32 data, orig;
6668 
6669 	orig = data = RREG32(RLC_PG_CNTL);
6670 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6671 		data |= STATIC_PER_CU_PG_ENABLE;
6672 	else
6673 		data &= ~STATIC_PER_CU_PG_ENABLE;
6674 	if (orig != data)
6675 		WREG32(RLC_PG_CNTL, data);
6676 }
6677 
6678 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6679 					bool enable)
6680 {
6681 	u32 data, orig;
6682 
6683 	orig = data = RREG32(RLC_PG_CNTL);
6684 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6685 		data |= DYN_PER_CU_PG_ENABLE;
6686 	else
6687 		data &= ~DYN_PER_CU_PG_ENABLE;
6688 	if (orig != data)
6689 		WREG32(RLC_PG_CNTL, data);
6690 }
6691 
6692 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6693 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6694 
6695 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6696 {
6697 	u32 data, orig;
6698 	u32 i;
6699 
6700 	if (rdev->rlc.cs_data) {
6701 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6702 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6703 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6704 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6705 	} else {
6706 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6707 		for (i = 0; i < 3; i++)
6708 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6709 	}
6710 	if (rdev->rlc.reg_list) {
6711 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6712 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6713 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6714 	}
6715 
6716 	orig = data = RREG32(RLC_PG_CNTL);
6717 	data |= GFX_PG_SRC;
6718 	if (orig != data)
6719 		WREG32(RLC_PG_CNTL, data);
6720 
6721 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6722 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6723 
6724 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6725 	data &= ~IDLE_POLL_COUNT_MASK;
6726 	data |= IDLE_POLL_COUNT(0x60);
6727 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6728 
6729 	data = 0x10101010;
6730 	WREG32(RLC_PG_DELAY, data);
6731 
6732 	data = RREG32(RLC_PG_DELAY_2);
6733 	data &= ~0xff;
6734 	data |= 0x3;
6735 	WREG32(RLC_PG_DELAY_2, data);
6736 
6737 	data = RREG32(RLC_AUTO_PG_CTRL);
6738 	data &= ~GRBM_REG_SGIT_MASK;
6739 	data |= GRBM_REG_SGIT(0x700);
6740 	WREG32(RLC_AUTO_PG_CTRL, data);
6741 
6742 }
6743 
6744 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6745 {
6746 	cik_enable_gfx_cgpg(rdev, enable);
6747 	cik_enable_gfx_static_mgpg(rdev, enable);
6748 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6749 }
6750 
6751 u32 cik_get_csb_size(struct radeon_device *rdev)
6752 {
6753 	u32 count = 0;
6754 	const struct cs_section_def *sect = NULL;
6755 	const struct cs_extent_def *ext = NULL;
6756 
6757 	if (rdev->rlc.cs_data == NULL)
6758 		return 0;
6759 
6760 	/* begin clear state */
6761 	count += 2;
6762 	/* context control state */
6763 	count += 3;
6764 
6765 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6766 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6767 			if (sect->id == SECT_CONTEXT)
6768 				count += 2 + ext->reg_count;
6769 			else
6770 				return 0;
6771 		}
6772 	}
6773 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6774 	count += 4;
6775 	/* end clear state */
6776 	count += 2;
6777 	/* clear state */
6778 	count += 2;
6779 
6780 	return count;
6781 }
6782 
6783 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6784 {
6785 	u32 count = 0, i;
6786 	const struct cs_section_def *sect = NULL;
6787 	const struct cs_extent_def *ext = NULL;
6788 
6789 	if (rdev->rlc.cs_data == NULL)
6790 		return;
6791 	if (buffer == NULL)
6792 		return;
6793 
6794 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6795 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6796 
6797 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6798 	buffer[count++] = cpu_to_le32(0x80000000);
6799 	buffer[count++] = cpu_to_le32(0x80000000);
6800 
6801 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6802 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6803 			if (sect->id == SECT_CONTEXT) {
6804 				buffer[count++] =
6805 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6806 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6807 				for (i = 0; i < ext->reg_count; i++)
6808 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6809 			} else {
6810 				return;
6811 			}
6812 		}
6813 	}
6814 
6815 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6816 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6817 	switch (rdev->family) {
6818 	case CHIP_BONAIRE:
6819 		buffer[count++] = cpu_to_le32(0x16000012);
6820 		buffer[count++] = cpu_to_le32(0x00000000);
6821 		break;
6822 	case CHIP_KAVERI:
6823 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6824 		buffer[count++] = cpu_to_le32(0x00000000);
6825 		break;
6826 	case CHIP_KABINI:
6827 	case CHIP_MULLINS:
6828 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6829 		buffer[count++] = cpu_to_le32(0x00000000);
6830 		break;
6831 	case CHIP_HAWAII:
6832 		buffer[count++] = cpu_to_le32(0x3a00161a);
6833 		buffer[count++] = cpu_to_le32(0x0000002e);
6834 		break;
6835 	default:
6836 		buffer[count++] = cpu_to_le32(0x00000000);
6837 		buffer[count++] = cpu_to_le32(0x00000000);
6838 		break;
6839 	}
6840 
6841 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6842 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6843 
6844 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6845 	buffer[count++] = cpu_to_le32(0);
6846 }
6847 
6848 static void cik_init_pg(struct radeon_device *rdev)
6849 {
6850 	if (rdev->pg_flags) {
6851 		cik_enable_sck_slowdown_on_pu(rdev, true);
6852 		cik_enable_sck_slowdown_on_pd(rdev, true);
6853 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6854 			cik_init_gfx_cgpg(rdev);
6855 			cik_enable_cp_pg(rdev, true);
6856 			cik_enable_gds_pg(rdev, true);
6857 		}
6858 		cik_init_ao_cu_mask(rdev);
6859 		cik_update_gfx_pg(rdev, true);
6860 	}
6861 }
6862 
6863 static void cik_fini_pg(struct radeon_device *rdev)
6864 {
6865 	if (rdev->pg_flags) {
6866 		cik_update_gfx_pg(rdev, false);
6867 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6868 			cik_enable_cp_pg(rdev, false);
6869 			cik_enable_gds_pg(rdev, false);
6870 		}
6871 	}
6872 }
6873 
6874 /*
6875  * Interrupts
6876  * Starting with r6xx, interrupts are handled via a ring buffer.
6877  * Ring buffers are areas of GPU accessible memory that the GPU
6878  * writes interrupt vectors into and the host reads vectors out of.
6879  * There is a rptr (read pointer) that determines where the
6880  * host is currently reading, and a wptr (write pointer)
6881  * which determines where the GPU has written.  When the
6882  * pointers are equal, the ring is idle.  When the GPU
6883  * writes vectors to the ring buffer, it increments the
6884  * wptr.  When there is an interrupt, the host then starts
6885  * fetching commands and processing them until the pointers are
6886  * equal again at which point it updates the rptr.
6887  */
6888 
6889 /**
6890  * cik_enable_interrupts - Enable the interrupt ring buffer
6891  *
6892  * @rdev: radeon_device pointer
6893  *
6894  * Enable the interrupt ring buffer (CIK).
6895  */
6896 static void cik_enable_interrupts(struct radeon_device *rdev)
6897 {
6898 	u32 ih_cntl = RREG32(IH_CNTL);
6899 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6900 
6901 	ih_cntl |= ENABLE_INTR;
6902 	ih_rb_cntl |= IH_RB_ENABLE;
6903 	WREG32(IH_CNTL, ih_cntl);
6904 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6905 	rdev->ih.enabled = true;
6906 }
6907 
6908 /**
6909  * cik_disable_interrupts - Disable the interrupt ring buffer
6910  *
6911  * @rdev: radeon_device pointer
6912  *
6913  * Disable the interrupt ring buffer (CIK).
6914  */
6915 static void cik_disable_interrupts(struct radeon_device *rdev)
6916 {
6917 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6918 	u32 ih_cntl = RREG32(IH_CNTL);
6919 
6920 	ih_rb_cntl &= ~IH_RB_ENABLE;
6921 	ih_cntl &= ~ENABLE_INTR;
6922 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6923 	WREG32(IH_CNTL, ih_cntl);
6924 	/* set rptr, wptr to 0 */
6925 	WREG32(IH_RB_RPTR, 0);
6926 	WREG32(IH_RB_WPTR, 0);
6927 	rdev->ih.enabled = false;
6928 	rdev->ih.rptr = 0;
6929 }
6930 
6931 /**
6932  * cik_disable_interrupt_state - Disable all interrupt sources
6933  *
6934  * @rdev: radeon_device pointer
6935  *
6936  * Clear all interrupt enable bits used by the driver (CIK).
6937  */
6938 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6939 {
6940 	u32 tmp;
6941 
6942 	/* gfx ring */
6943 	tmp = RREG32(CP_INT_CNTL_RING0) &
6944 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6945 	WREG32(CP_INT_CNTL_RING0, tmp);
6946 	/* sdma */
6947 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6948 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6949 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6950 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6951 	/* compute queues */
6952 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6953 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6954 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6955 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6956 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6957 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6958 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6959 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6960 	/* grbm */
6961 	WREG32(GRBM_INT_CNTL, 0);
6962 	/* SRBM */
6963 	WREG32(SRBM_INT_CNTL, 0);
6964 	/* vline/vblank, etc. */
6965 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6966 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6967 	if (rdev->num_crtc >= 4) {
6968 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6969 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6970 	}
6971 	if (rdev->num_crtc >= 6) {
6972 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6973 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6974 	}
6975 	/* pflip */
6976 	if (rdev->num_crtc >= 2) {
6977 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6978 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6979 	}
6980 	if (rdev->num_crtc >= 4) {
6981 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6982 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6983 	}
6984 	if (rdev->num_crtc >= 6) {
6985 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6986 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6987 	}
6988 
6989 	/* dac hotplug */
6990 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6991 
6992 	/* digital hotplug */
6993 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6994 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6995 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6996 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6997 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6998 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6999 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7000 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7001 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7002 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7003 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7004 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7005 
7006 }
7007 
7008 /**
7009  * cik_irq_init - init and enable the interrupt ring
7010  *
7011  * @rdev: radeon_device pointer
7012  *
7013  * Allocate a ring buffer for the interrupt controller,
7014  * enable the RLC, disable interrupts, enable the IH
7015  * ring buffer and enable it (CIK).
7016  * Called at device load and reume.
7017  * Returns 0 for success, errors for failure.
7018  */
7019 static int cik_irq_init(struct radeon_device *rdev)
7020 {
7021 	int ret = 0;
7022 	int rb_bufsz;
7023 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7024 
7025 	/* allocate ring */
7026 	ret = r600_ih_ring_alloc(rdev);
7027 	if (ret)
7028 		return ret;
7029 
7030 	/* disable irqs */
7031 	cik_disable_interrupts(rdev);
7032 
7033 	/* init rlc */
7034 	ret = cik_rlc_resume(rdev);
7035 	if (ret) {
7036 		r600_ih_ring_fini(rdev);
7037 		return ret;
7038 	}
7039 
7040 	/* setup interrupt control */
7041 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7042 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7043 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7044 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7045 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7046 	 */
7047 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7048 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7049 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7050 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7051 
7052 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7053 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7054 
7055 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7056 		      IH_WPTR_OVERFLOW_CLEAR |
7057 		      (rb_bufsz << 1));
7058 
7059 	if (rdev->wb.enabled)
7060 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7061 
7062 	/* set the writeback address whether it's enabled or not */
7063 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7064 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7065 
7066 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7067 
7068 	/* set rptr, wptr to 0 */
7069 	WREG32(IH_RB_RPTR, 0);
7070 	WREG32(IH_RB_WPTR, 0);
7071 
7072 	/* Default settings for IH_CNTL (disabled at first) */
7073 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7074 	/* RPTR_REARM only works if msi's are enabled */
7075 	if (rdev->msi_enabled)
7076 		ih_cntl |= RPTR_REARM;
7077 	WREG32(IH_CNTL, ih_cntl);
7078 
7079 	/* force the active interrupt state to all disabled */
7080 	cik_disable_interrupt_state(rdev);
7081 
7082 	pci_set_master(rdev->pdev);
7083 
7084 	/* enable irqs */
7085 	cik_enable_interrupts(rdev);
7086 
7087 	return ret;
7088 }
7089 
7090 /**
7091  * cik_irq_set - enable/disable interrupt sources
7092  *
7093  * @rdev: radeon_device pointer
7094  *
7095  * Enable interrupt sources on the GPU (vblanks, hpd,
7096  * etc.) (CIK).
7097  * Returns 0 for success, errors for failure.
7098  */
7099 int cik_irq_set(struct radeon_device *rdev)
7100 {
7101 	u32 cp_int_cntl;
7102 	u32 cp_m1p0;
7103 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7104 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7105 	u32 grbm_int_cntl = 0;
7106 	u32 dma_cntl, dma_cntl1;
7107 
7108 	if (!rdev->irq.installed) {
7109 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7110 		return -EINVAL;
7111 	}
7112 	/* don't enable anything if the ih is disabled */
7113 	if (!rdev->ih.enabled) {
7114 		cik_disable_interrupts(rdev);
7115 		/* force the active interrupt state to all disabled */
7116 		cik_disable_interrupt_state(rdev);
7117 		return 0;
7118 	}
7119 
7120 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7121 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7122 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7123 
7124 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7125 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7130 
7131 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7132 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7133 
7134 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7135 
7136 	/* enable CP interrupts on all rings */
7137 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7138 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7139 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7140 	}
7141 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7142 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7143 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7144 		if (ring->me == 1) {
7145 			switch (ring->pipe) {
7146 			case 0:
7147 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7148 				break;
7149 			default:
7150 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7151 				break;
7152 			}
7153 		} else {
7154 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7155 		}
7156 	}
7157 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7158 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7159 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7160 		if (ring->me == 1) {
7161 			switch (ring->pipe) {
7162 			case 0:
7163 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7164 				break;
7165 			default:
7166 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7167 				break;
7168 			}
7169 		} else {
7170 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7171 		}
7172 	}
7173 
7174 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7175 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7176 		dma_cntl |= TRAP_ENABLE;
7177 	}
7178 
7179 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7180 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7181 		dma_cntl1 |= TRAP_ENABLE;
7182 	}
7183 
7184 	if (rdev->irq.crtc_vblank_int[0] ||
7185 	    atomic_read(&rdev->irq.pflip[0])) {
7186 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7187 		crtc1 |= VBLANK_INTERRUPT_MASK;
7188 	}
7189 	if (rdev->irq.crtc_vblank_int[1] ||
7190 	    atomic_read(&rdev->irq.pflip[1])) {
7191 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7192 		crtc2 |= VBLANK_INTERRUPT_MASK;
7193 	}
7194 	if (rdev->irq.crtc_vblank_int[2] ||
7195 	    atomic_read(&rdev->irq.pflip[2])) {
7196 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7197 		crtc3 |= VBLANK_INTERRUPT_MASK;
7198 	}
7199 	if (rdev->irq.crtc_vblank_int[3] ||
7200 	    atomic_read(&rdev->irq.pflip[3])) {
7201 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7202 		crtc4 |= VBLANK_INTERRUPT_MASK;
7203 	}
7204 	if (rdev->irq.crtc_vblank_int[4] ||
7205 	    atomic_read(&rdev->irq.pflip[4])) {
7206 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7207 		crtc5 |= VBLANK_INTERRUPT_MASK;
7208 	}
7209 	if (rdev->irq.crtc_vblank_int[5] ||
7210 	    atomic_read(&rdev->irq.pflip[5])) {
7211 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7212 		crtc6 |= VBLANK_INTERRUPT_MASK;
7213 	}
7214 	if (rdev->irq.hpd[0]) {
7215 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7216 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7217 	}
7218 	if (rdev->irq.hpd[1]) {
7219 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7220 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7221 	}
7222 	if (rdev->irq.hpd[2]) {
7223 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7224 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7225 	}
7226 	if (rdev->irq.hpd[3]) {
7227 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7228 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7229 	}
7230 	if (rdev->irq.hpd[4]) {
7231 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7232 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7233 	}
7234 	if (rdev->irq.hpd[5]) {
7235 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7236 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7237 	}
7238 
7239 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7240 
7241 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7242 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7243 
7244 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7245 
7246 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247 
7248 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250 	if (rdev->num_crtc >= 4) {
7251 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253 	}
7254 	if (rdev->num_crtc >= 6) {
7255 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257 	}
7258 
7259 	if (rdev->num_crtc >= 2) {
7260 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261 		       GRPH_PFLIP_INT_MASK);
7262 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263 		       GRPH_PFLIP_INT_MASK);
7264 	}
7265 	if (rdev->num_crtc >= 4) {
7266 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267 		       GRPH_PFLIP_INT_MASK);
7268 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269 		       GRPH_PFLIP_INT_MASK);
7270 	}
7271 	if (rdev->num_crtc >= 6) {
7272 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273 		       GRPH_PFLIP_INT_MASK);
7274 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275 		       GRPH_PFLIP_INT_MASK);
7276 	}
7277 
7278 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284 
7285 	/* posting read */
7286 	RREG32(SRBM_STATUS);
7287 
7288 	return 0;
7289 }
7290 
7291 /**
7292  * cik_irq_ack - ack interrupt sources
7293  *
7294  * @rdev: radeon_device pointer
7295  *
7296  * Ack interrupt sources on the GPU (vblanks, hpd,
7297  * etc.) (CIK).  Certain interrupts sources are sw
7298  * generated and do not require an explicit ack.
7299  */
7300 static inline void cik_irq_ack(struct radeon_device *rdev)
7301 {
7302 	u32 tmp;
7303 
7304 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311 
7312 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7314 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7316 	if (rdev->num_crtc >= 4) {
7317 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7319 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7321 	}
7322 	if (rdev->num_crtc >= 6) {
7323 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7325 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7327 	}
7328 
7329 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331 		       GRPH_PFLIP_INT_CLEAR);
7332 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334 		       GRPH_PFLIP_INT_CLEAR);
7335 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343 
7344 	if (rdev->num_crtc >= 4) {
7345 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347 			       GRPH_PFLIP_INT_CLEAR);
7348 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350 			       GRPH_PFLIP_INT_CLEAR);
7351 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359 	}
7360 
7361 	if (rdev->num_crtc >= 6) {
7362 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364 			       GRPH_PFLIP_INT_CLEAR);
7365 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367 			       GRPH_PFLIP_INT_CLEAR);
7368 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376 	}
7377 
7378 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7380 		tmp |= DC_HPDx_INT_ACK;
7381 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7382 	}
7383 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7385 		tmp |= DC_HPDx_INT_ACK;
7386 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7387 	}
7388 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7390 		tmp |= DC_HPDx_INT_ACK;
7391 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7392 	}
7393 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7395 		tmp |= DC_HPDx_INT_ACK;
7396 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7397 	}
7398 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7400 		tmp |= DC_HPDx_INT_ACK;
7401 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7402 	}
7403 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7405 		tmp |= DC_HPDx_INT_ACK;
7406 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7407 	}
7408 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7410 		tmp |= DC_HPDx_RX_INT_ACK;
7411 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7412 	}
7413 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7415 		tmp |= DC_HPDx_RX_INT_ACK;
7416 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7417 	}
7418 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7420 		tmp |= DC_HPDx_RX_INT_ACK;
7421 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7422 	}
7423 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7425 		tmp |= DC_HPDx_RX_INT_ACK;
7426 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7427 	}
7428 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7430 		tmp |= DC_HPDx_RX_INT_ACK;
7431 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7432 	}
7433 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7435 		tmp |= DC_HPDx_RX_INT_ACK;
7436 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7437 	}
7438 }
7439 
7440 /**
7441  * cik_irq_disable - disable interrupts
7442  *
7443  * @rdev: radeon_device pointer
7444  *
7445  * Disable interrupts on the hw (CIK).
7446  */
7447 static void cik_irq_disable(struct radeon_device *rdev)
7448 {
7449 	cik_disable_interrupts(rdev);
7450 	/* Wait and acknowledge irq */
7451 	mdelay(1);
7452 	cik_irq_ack(rdev);
7453 	cik_disable_interrupt_state(rdev);
7454 }
7455 
7456 /**
7457  * cik_irq_disable - disable interrupts for suspend
7458  *
7459  * @rdev: radeon_device pointer
7460  *
7461  * Disable interrupts and stop the RLC (CIK).
7462  * Used for suspend.
7463  */
7464 static void cik_irq_suspend(struct radeon_device *rdev)
7465 {
7466 	cik_irq_disable(rdev);
7467 	cik_rlc_stop(rdev);
7468 }
7469 
7470 /**
7471  * cik_irq_fini - tear down interrupt support
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Disable interrupts on the hw and free the IH ring
7476  * buffer (CIK).
7477  * Used for driver unload.
7478  */
7479 static void cik_irq_fini(struct radeon_device *rdev)
7480 {
7481 	cik_irq_suspend(rdev);
7482 	r600_ih_ring_fini(rdev);
7483 }
7484 
7485 /**
7486  * cik_get_ih_wptr - get the IH ring buffer wptr
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Get the IH ring buffer wptr from either the register
7491  * or the writeback memory buffer (CIK).  Also check for
7492  * ring buffer overflow and deal with it.
7493  * Used by cik_irq_process().
7494  * Returns the value of the wptr.
7495  */
7496 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497 {
7498 	u32 wptr, tmp;
7499 
7500 	if (rdev->wb.enabled)
7501 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502 	else
7503 		wptr = RREG32(IH_RB_WPTR);
7504 
7505 	if (wptr & RB_OVERFLOW) {
7506 		wptr &= ~RB_OVERFLOW;
7507 		/* When a ring buffer overflow happen start parsing interrupt
7508 		 * from the last not overwritten vector (wptr + 16). Hopefully
7509 		 * this should allow us to catchup.
7510 		 */
7511 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514 		tmp = RREG32(IH_RB_CNTL);
7515 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516 		WREG32(IH_RB_CNTL, tmp);
7517 	}
7518 	return (wptr & rdev->ih.ptr_mask);
7519 }
7520 
7521 /*        CIK IV Ring
7522  * Each IV ring entry is 128 bits:
7523  * [7:0]    - interrupt source id
7524  * [31:8]   - reserved
7525  * [59:32]  - interrupt source data
7526  * [63:60]  - reserved
7527  * [71:64]  - RINGID
7528  *            CP:
7529  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533  *            PIPE_ID - ME0 0=3D
7534  *                    - ME1&2 compute dispatcher (4 pipes each)
7535  *            SDMA:
7536  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539  * [79:72]  - VMID
7540  * [95:80]  - PASID
7541  * [127:96] - reserved
7542  */
7543 /**
7544  * cik_irq_process - interrupt handler
7545  *
7546  * @rdev: radeon_device pointer
7547  *
7548  * Interrupt hander (CIK).  Walk the IH ring,
7549  * ack interrupts and schedule work to handle
7550  * interrupt events.
7551  * Returns irq process return code.
7552  */
7553 int cik_irq_process(struct radeon_device *rdev)
7554 {
7555 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557 	u32 wptr;
7558 	u32 rptr;
7559 	u32 src_id, src_data, ring_id;
7560 	u8 me_id, pipe_id, queue_id;
7561 	u32 ring_index;
7562 	bool queue_hotplug = false;
7563 	bool queue_dp = false;
7564 	bool queue_reset = false;
7565 	u32 addr, status, mc_client;
7566 	bool queue_thermal = false;
7567 
7568 	if (!rdev->ih.enabled || rdev->shutdown)
7569 		return IRQ_NONE;
7570 
7571 	wptr = cik_get_ih_wptr(rdev);
7572 
7573 restart_ih:
7574 	/* is somebody else already processing irqs? */
7575 	if (atomic_xchg(&rdev->ih.lock, 1))
7576 		return IRQ_NONE;
7577 
7578 	rptr = rdev->ih.rptr;
7579 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580 
7581 	/* Order reading of wptr vs. reading of IH ring data */
7582 	rmb();
7583 
7584 	/* display interrupts */
7585 	cik_irq_ack(rdev);
7586 
7587 	while (rptr != wptr) {
7588 		/* wptr/rptr are in bytes! */
7589 		ring_index = rptr / 4;
7590 
7591 		radeon_kfd_interrupt(rdev,
7592 				(const void *) &rdev->ih.ring[ring_index]);
7593 
7594 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7595 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7596 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7597 
7598 		switch (src_id) {
7599 		case 1: /* D1 vblank/vline */
7600 			switch (src_data) {
7601 			case 0: /* D1 vblank */
7602 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7603 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7604 
7605 				if (rdev->irq.crtc_vblank_int[0]) {
7606 					drm_handle_vblank(rdev->ddev, 0);
7607 					rdev->pm.vblank_sync = true;
7608 					wake_up(&rdev->irq.vblank_queue);
7609 				}
7610 				if (atomic_read(&rdev->irq.pflip[0]))
7611 					radeon_crtc_handle_vblank(rdev, 0);
7612 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7613 				DRM_DEBUG("IH: D1 vblank\n");
7614 
7615 				break;
7616 			case 1: /* D1 vline */
7617 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7618 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619 
7620 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7621 				DRM_DEBUG("IH: D1 vline\n");
7622 
7623 				break;
7624 			default:
7625 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7626 				break;
7627 			}
7628 			break;
7629 		case 2: /* D2 vblank/vline */
7630 			switch (src_data) {
7631 			case 0: /* D2 vblank */
7632 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7633 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634 
7635 				if (rdev->irq.crtc_vblank_int[1]) {
7636 					drm_handle_vblank(rdev->ddev, 1);
7637 					rdev->pm.vblank_sync = true;
7638 					wake_up(&rdev->irq.vblank_queue);
7639 				}
7640 				if (atomic_read(&rdev->irq.pflip[1]))
7641 					radeon_crtc_handle_vblank(rdev, 1);
7642 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7643 				DRM_DEBUG("IH: D2 vblank\n");
7644 
7645 				break;
7646 			case 1: /* D2 vline */
7647 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7648 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649 
7650 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7651 				DRM_DEBUG("IH: D2 vline\n");
7652 
7653 				break;
7654 			default:
7655 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7656 				break;
7657 			}
7658 			break;
7659 		case 3: /* D3 vblank/vline */
7660 			switch (src_data) {
7661 			case 0: /* D3 vblank */
7662 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7663 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664 
7665 				if (rdev->irq.crtc_vblank_int[2]) {
7666 					drm_handle_vblank(rdev->ddev, 2);
7667 					rdev->pm.vblank_sync = true;
7668 					wake_up(&rdev->irq.vblank_queue);
7669 				}
7670 				if (atomic_read(&rdev->irq.pflip[2]))
7671 					radeon_crtc_handle_vblank(rdev, 2);
7672 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7673 				DRM_DEBUG("IH: D3 vblank\n");
7674 
7675 				break;
7676 			case 1: /* D3 vline */
7677 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7678 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679 
7680 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7681 				DRM_DEBUG("IH: D3 vline\n");
7682 
7683 				break;
7684 			default:
7685 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7686 				break;
7687 			}
7688 			break;
7689 		case 4: /* D4 vblank/vline */
7690 			switch (src_data) {
7691 			case 0: /* D4 vblank */
7692 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7693 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694 
7695 				if (rdev->irq.crtc_vblank_int[3]) {
7696 					drm_handle_vblank(rdev->ddev, 3);
7697 					rdev->pm.vblank_sync = true;
7698 					wake_up(&rdev->irq.vblank_queue);
7699 				}
7700 				if (atomic_read(&rdev->irq.pflip[3]))
7701 					radeon_crtc_handle_vblank(rdev, 3);
7702 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7703 				DRM_DEBUG("IH: D4 vblank\n");
7704 
7705 				break;
7706 			case 1: /* D4 vline */
7707 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7708 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709 
7710 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7711 				DRM_DEBUG("IH: D4 vline\n");
7712 
7713 				break;
7714 			default:
7715 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7716 				break;
7717 			}
7718 			break;
7719 		case 5: /* D5 vblank/vline */
7720 			switch (src_data) {
7721 			case 0: /* D5 vblank */
7722 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7723 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724 
7725 				if (rdev->irq.crtc_vblank_int[4]) {
7726 					drm_handle_vblank(rdev->ddev, 4);
7727 					rdev->pm.vblank_sync = true;
7728 					wake_up(&rdev->irq.vblank_queue);
7729 				}
7730 				if (atomic_read(&rdev->irq.pflip[4]))
7731 					radeon_crtc_handle_vblank(rdev, 4);
7732 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7733 				DRM_DEBUG("IH: D5 vblank\n");
7734 
7735 				break;
7736 			case 1: /* D5 vline */
7737 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7738 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739 
7740 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7741 				DRM_DEBUG("IH: D5 vline\n");
7742 
7743 				break;
7744 			default:
7745 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7746 				break;
7747 			}
7748 			break;
7749 		case 6: /* D6 vblank/vline */
7750 			switch (src_data) {
7751 			case 0: /* D6 vblank */
7752 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7753 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754 
7755 				if (rdev->irq.crtc_vblank_int[5]) {
7756 					drm_handle_vblank(rdev->ddev, 5);
7757 					rdev->pm.vblank_sync = true;
7758 					wake_up(&rdev->irq.vblank_queue);
7759 				}
7760 				if (atomic_read(&rdev->irq.pflip[5]))
7761 					radeon_crtc_handle_vblank(rdev, 5);
7762 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7763 				DRM_DEBUG("IH: D6 vblank\n");
7764 
7765 				break;
7766 			case 1: /* D6 vline */
7767 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7768 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769 
7770 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7771 				DRM_DEBUG("IH: D6 vline\n");
7772 
7773 				break;
7774 			default:
7775 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7776 				break;
7777 			}
7778 			break;
7779 		case 8: /* D1 page flip */
7780 		case 10: /* D2 page flip */
7781 		case 12: /* D3 page flip */
7782 		case 14: /* D4 page flip */
7783 		case 16: /* D5 page flip */
7784 		case 18: /* D6 page flip */
7785 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7786 			if (radeon_use_pflipirq > 0)
7787 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7788 			break;
7789 		case 42: /* HPD hotplug */
7790 			switch (src_data) {
7791 			case 0:
7792 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7793 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794 
7795 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7796 				queue_hotplug = true;
7797 				DRM_DEBUG("IH: HPD1\n");
7798 
7799 				break;
7800 			case 1:
7801 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7802 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803 
7804 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7805 				queue_hotplug = true;
7806 				DRM_DEBUG("IH: HPD2\n");
7807 
7808 				break;
7809 			case 2:
7810 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7811 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812 
7813 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7814 				queue_hotplug = true;
7815 				DRM_DEBUG("IH: HPD3\n");
7816 
7817 				break;
7818 			case 3:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD4\n");
7825 
7826 				break;
7827 			case 4:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7832 				queue_hotplug = true;
7833 				DRM_DEBUG("IH: HPD5\n");
7834 
7835 				break;
7836 			case 5:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7841 				queue_hotplug = true;
7842 				DRM_DEBUG("IH: HPD6\n");
7843 
7844 				break;
7845 			case 6:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7850 				queue_dp = true;
7851 				DRM_DEBUG("IH: HPD_RX 1\n");
7852 
7853 				break;
7854 			case 7:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7859 				queue_dp = true;
7860 				DRM_DEBUG("IH: HPD_RX 2\n");
7861 
7862 				break;
7863 			case 8:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7868 				queue_dp = true;
7869 				DRM_DEBUG("IH: HPD_RX 3\n");
7870 
7871 				break;
7872 			case 9:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 4\n");
7879 
7880 				break;
7881 			case 10:
7882 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7883 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884 
7885 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7886 				queue_dp = true;
7887 				DRM_DEBUG("IH: HPD_RX 5\n");
7888 
7889 				break;
7890 			case 11:
7891 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7892 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893 
7894 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7895 				queue_dp = true;
7896 				DRM_DEBUG("IH: HPD_RX 6\n");
7897 
7898 				break;
7899 			default:
7900 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7901 				break;
7902 			}
7903 			break;
7904 		case 96:
7905 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7906 			WREG32(SRBM_INT_ACK, 0x1);
7907 			break;
7908 		case 124: /* UVD */
7909 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7910 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7911 			break;
7912 		case 146:
7913 		case 147:
7914 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7915 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7916 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7917 			/* reset addr and status */
7918 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7919 			if (addr == 0x0 && status == 0x0)
7920 				break;
7921 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7922 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7923 				addr);
7924 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7925 				status);
7926 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7927 			break;
7928 		case 167: /* VCE */
7929 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7930 			switch (src_data) {
7931 			case 0:
7932 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7933 				break;
7934 			case 1:
7935 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7936 				break;
7937 			default:
7938 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7939 				break;
7940 			}
7941 			break;
7942 		case 176: /* GFX RB CP_INT */
7943 		case 177: /* GFX IB CP_INT */
7944 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7945 			break;
7946 		case 181: /* CP EOP event */
7947 			DRM_DEBUG("IH: CP EOP\n");
7948 			/* XXX check the bitfield order! */
7949 			me_id = (ring_id & 0x60) >> 5;
7950 			pipe_id = (ring_id & 0x18) >> 3;
7951 			queue_id = (ring_id & 0x7) >> 0;
7952 			switch (me_id) {
7953 			case 0:
7954 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7955 				break;
7956 			case 1:
7957 			case 2:
7958 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7959 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7960 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7961 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7962 				break;
7963 			}
7964 			break;
7965 		case 184: /* CP Privileged reg access */
7966 			DRM_ERROR("Illegal register access in command stream\n");
7967 			/* XXX check the bitfield order! */
7968 			me_id = (ring_id & 0x60) >> 5;
7969 			pipe_id = (ring_id & 0x18) >> 3;
7970 			queue_id = (ring_id & 0x7) >> 0;
7971 			switch (me_id) {
7972 			case 0:
7973 				/* This results in a full GPU reset, but all we need to do is soft
7974 				 * reset the CP for gfx
7975 				 */
7976 				queue_reset = true;
7977 				break;
7978 			case 1:
7979 				/* XXX compute */
7980 				queue_reset = true;
7981 				break;
7982 			case 2:
7983 				/* XXX compute */
7984 				queue_reset = true;
7985 				break;
7986 			}
7987 			break;
7988 		case 185: /* CP Privileged inst */
7989 			DRM_ERROR("Illegal instruction in command stream\n");
7990 			/* XXX check the bitfield order! */
7991 			me_id = (ring_id & 0x60) >> 5;
7992 			pipe_id = (ring_id & 0x18) >> 3;
7993 			queue_id = (ring_id & 0x7) >> 0;
7994 			switch (me_id) {
7995 			case 0:
7996 				/* This results in a full GPU reset, but all we need to do is soft
7997 				 * reset the CP for gfx
7998 				 */
7999 				queue_reset = true;
8000 				break;
8001 			case 1:
8002 				/* XXX compute */
8003 				queue_reset = true;
8004 				break;
8005 			case 2:
8006 				/* XXX compute */
8007 				queue_reset = true;
8008 				break;
8009 			}
8010 			break;
8011 		case 224: /* SDMA trap event */
8012 			/* XXX check the bitfield order! */
8013 			me_id = (ring_id & 0x3) >> 0;
8014 			queue_id = (ring_id & 0xc) >> 2;
8015 			DRM_DEBUG("IH: SDMA trap\n");
8016 			switch (me_id) {
8017 			case 0:
8018 				switch (queue_id) {
8019 				case 0:
8020 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8021 					break;
8022 				case 1:
8023 					/* XXX compute */
8024 					break;
8025 				case 2:
8026 					/* XXX compute */
8027 					break;
8028 				}
8029 				break;
8030 			case 1:
8031 				switch (queue_id) {
8032 				case 0:
8033 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8034 					break;
8035 				case 1:
8036 					/* XXX compute */
8037 					break;
8038 				case 2:
8039 					/* XXX compute */
8040 					break;
8041 				}
8042 				break;
8043 			}
8044 			break;
8045 		case 230: /* thermal low to high */
8046 			DRM_DEBUG("IH: thermal low to high\n");
8047 			rdev->pm.dpm.thermal.high_to_low = false;
8048 			queue_thermal = true;
8049 			break;
8050 		case 231: /* thermal high to low */
8051 			DRM_DEBUG("IH: thermal high to low\n");
8052 			rdev->pm.dpm.thermal.high_to_low = true;
8053 			queue_thermal = true;
8054 			break;
8055 		case 233: /* GUI IDLE */
8056 			DRM_DEBUG("IH: GUI idle\n");
8057 			break;
8058 		case 241: /* SDMA Privileged inst */
8059 		case 247: /* SDMA Privileged inst */
8060 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8061 			/* XXX check the bitfield order! */
8062 			me_id = (ring_id & 0x3) >> 0;
8063 			queue_id = (ring_id & 0xc) >> 2;
8064 			switch (me_id) {
8065 			case 0:
8066 				switch (queue_id) {
8067 				case 0:
8068 					queue_reset = true;
8069 					break;
8070 				case 1:
8071 					/* XXX compute */
8072 					queue_reset = true;
8073 					break;
8074 				case 2:
8075 					/* XXX compute */
8076 					queue_reset = true;
8077 					break;
8078 				}
8079 				break;
8080 			case 1:
8081 				switch (queue_id) {
8082 				case 0:
8083 					queue_reset = true;
8084 					break;
8085 				case 1:
8086 					/* XXX compute */
8087 					queue_reset = true;
8088 					break;
8089 				case 2:
8090 					/* XXX compute */
8091 					queue_reset = true;
8092 					break;
8093 				}
8094 				break;
8095 			}
8096 			break;
8097 		default:
8098 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8099 			break;
8100 		}
8101 
8102 		/* wptr/rptr are in bytes! */
8103 		rptr += 16;
8104 		rptr &= rdev->ih.ptr_mask;
8105 		WREG32(IH_RB_RPTR, rptr);
8106 	}
8107 	if (queue_dp)
8108 		schedule_work(&rdev->dp_work);
8109 	if (queue_hotplug)
8110 		schedule_delayed_work(&rdev->hotplug_work, 0);
8111 	if (queue_reset) {
8112 		rdev->needs_reset = true;
8113 		wake_up_all(&rdev->fence_queue);
8114 	}
8115 	if (queue_thermal)
8116 		schedule_work(&rdev->pm.dpm.thermal.work);
8117 	rdev->ih.rptr = rptr;
8118 	atomic_set(&rdev->ih.lock, 0);
8119 
8120 	/* make sure wptr hasn't changed while processing */
8121 	wptr = cik_get_ih_wptr(rdev);
8122 	if (wptr != rptr)
8123 		goto restart_ih;
8124 
8125 	return IRQ_HANDLED;
8126 }
8127 
8128 /*
8129  * startup/shutdown callbacks
8130  */
8131 static void cik_uvd_init(struct radeon_device *rdev)
8132 {
8133 	int r;
8134 
8135 	if (!rdev->has_uvd)
8136 		return;
8137 
8138 	r = radeon_uvd_init(rdev);
8139 	if (r) {
8140 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8141 		/*
8142 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8143 		 * to early fails cik_uvd_start() and thus nothing happens
8144 		 * there. So it is pointless to try to go through that code
8145 		 * hence why we disable uvd here.
8146 		 */
8147 		rdev->has_uvd = 0;
8148 		return;
8149 	}
8150 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8151 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8152 }
8153 
8154 static void cik_uvd_start(struct radeon_device *rdev)
8155 {
8156 	int r;
8157 
8158 	if (!rdev->has_uvd)
8159 		return;
8160 
8161 	r = radeon_uvd_resume(rdev);
8162 	if (r) {
8163 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8164 		goto error;
8165 	}
8166 	r = uvd_v4_2_resume(rdev);
8167 	if (r) {
8168 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8169 		goto error;
8170 	}
8171 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8172 	if (r) {
8173 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8174 		goto error;
8175 	}
8176 	return;
8177 
8178 error:
8179 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8180 }
8181 
8182 static void cik_uvd_resume(struct radeon_device *rdev)
8183 {
8184 	struct radeon_ring *ring;
8185 	int r;
8186 
8187 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8188 		return;
8189 
8190 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8191 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8192 	if (r) {
8193 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8194 		return;
8195 	}
8196 	r = uvd_v1_0_init(rdev);
8197 	if (r) {
8198 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8199 		return;
8200 	}
8201 }
8202 
8203 static void cik_vce_init(struct radeon_device *rdev)
8204 {
8205 	int r;
8206 
8207 	if (!rdev->has_vce)
8208 		return;
8209 
8210 	r = radeon_vce_init(rdev);
8211 	if (r) {
8212 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8213 		/*
8214 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8215 		 * to early fails cik_vce_start() and thus nothing happens
8216 		 * there. So it is pointless to try to go through that code
8217 		 * hence why we disable vce here.
8218 		 */
8219 		rdev->has_vce = 0;
8220 		return;
8221 	}
8222 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8223 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8224 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8225 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8226 }
8227 
8228 static void cik_vce_start(struct radeon_device *rdev)
8229 {
8230 	int r;
8231 
8232 	if (!rdev->has_vce)
8233 		return;
8234 
8235 	r = radeon_vce_resume(rdev);
8236 	if (r) {
8237 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8238 		goto error;
8239 	}
8240 	r = vce_v2_0_resume(rdev);
8241 	if (r) {
8242 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8243 		goto error;
8244 	}
8245 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8246 	if (r) {
8247 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8248 		goto error;
8249 	}
8250 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8251 	if (r) {
8252 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8253 		goto error;
8254 	}
8255 	return;
8256 
8257 error:
8258 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8259 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8260 }
8261 
8262 static void cik_vce_resume(struct radeon_device *rdev)
8263 {
8264 	struct radeon_ring *ring;
8265 	int r;
8266 
8267 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8268 		return;
8269 
8270 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8271 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8272 	if (r) {
8273 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8274 		return;
8275 	}
8276 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8277 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8278 	if (r) {
8279 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8280 		return;
8281 	}
8282 	r = vce_v1_0_init(rdev);
8283 	if (r) {
8284 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8285 		return;
8286 	}
8287 }
8288 
8289 /**
8290  * cik_startup - program the asic to a functional state
8291  *
8292  * @rdev: radeon_device pointer
8293  *
8294  * Programs the asic to a functional state (CIK).
8295  * Called by cik_init() and cik_resume().
8296  * Returns 0 for success, error for failure.
8297  */
8298 static int cik_startup(struct radeon_device *rdev)
8299 {
8300 	struct radeon_ring *ring;
8301 	u32 nop;
8302 	int r;
8303 
8304 	/* enable pcie gen2/3 link */
8305 	cik_pcie_gen3_enable(rdev);
8306 	/* enable aspm */
8307 	cik_program_aspm(rdev);
8308 
8309 	/* scratch needs to be initialized before MC */
8310 	r = r600_vram_scratch_init(rdev);
8311 	if (r)
8312 		return r;
8313 
8314 	cik_mc_program(rdev);
8315 
8316 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8317 		r = ci_mc_load_microcode(rdev);
8318 		if (r) {
8319 			DRM_ERROR("Failed to load MC firmware!\n");
8320 			return r;
8321 		}
8322 	}
8323 
8324 	r = cik_pcie_gart_enable(rdev);
8325 	if (r)
8326 		return r;
8327 	cik_gpu_init(rdev);
8328 
8329 	/* allocate rlc buffers */
8330 	if (rdev->flags & RADEON_IS_IGP) {
8331 		if (rdev->family == CHIP_KAVERI) {
8332 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8333 			rdev->rlc.reg_list_size =
8334 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8335 		} else {
8336 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8337 			rdev->rlc.reg_list_size =
8338 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8339 		}
8340 	}
8341 	rdev->rlc.cs_data = ci_cs_data;
8342 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8343 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8344 	r = sumo_rlc_init(rdev);
8345 	if (r) {
8346 		DRM_ERROR("Failed to init rlc BOs!\n");
8347 		return r;
8348 	}
8349 
8350 	/* allocate wb buffer */
8351 	r = radeon_wb_init(rdev);
8352 	if (r)
8353 		return r;
8354 
8355 	/* allocate mec buffers */
8356 	r = cik_mec_init(rdev);
8357 	if (r) {
8358 		DRM_ERROR("Failed to init MEC BOs!\n");
8359 		return r;
8360 	}
8361 
8362 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8363 	if (r) {
8364 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8365 		return r;
8366 	}
8367 
8368 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8369 	if (r) {
8370 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8371 		return r;
8372 	}
8373 
8374 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8375 	if (r) {
8376 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8377 		return r;
8378 	}
8379 
8380 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8381 	if (r) {
8382 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8383 		return r;
8384 	}
8385 
8386 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8387 	if (r) {
8388 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8389 		return r;
8390 	}
8391 
8392 	cik_uvd_start(rdev);
8393 	cik_vce_start(rdev);
8394 
8395 	/* Enable IRQ */
8396 	if (!rdev->irq.installed) {
8397 		r = radeon_irq_kms_init(rdev);
8398 		if (r)
8399 			return r;
8400 	}
8401 
8402 	r = cik_irq_init(rdev);
8403 	if (r) {
8404 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8405 		radeon_irq_kms_fini(rdev);
8406 		return r;
8407 	}
8408 	cik_irq_set(rdev);
8409 
8410 	if (rdev->family == CHIP_HAWAII) {
8411 		if (rdev->new_fw)
8412 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8413 		else
8414 			nop = RADEON_CP_PACKET2;
8415 	} else {
8416 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8417 	}
8418 
8419 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8420 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8421 			     nop);
8422 	if (r)
8423 		return r;
8424 
8425 	/* set up the compute queues */
8426 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8427 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8428 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8429 			     nop);
8430 	if (r)
8431 		return r;
8432 	ring->me = 1; /* first MEC */
8433 	ring->pipe = 0; /* first pipe */
8434 	ring->queue = 0; /* first queue */
8435 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8436 
8437 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8438 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8439 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8440 			     nop);
8441 	if (r)
8442 		return r;
8443 	/* dGPU only have 1 MEC */
8444 	ring->me = 1; /* first MEC */
8445 	ring->pipe = 0; /* first pipe */
8446 	ring->queue = 1; /* second queue */
8447 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8448 
8449 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8450 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8451 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8452 	if (r)
8453 		return r;
8454 
8455 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8456 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8457 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8458 	if (r)
8459 		return r;
8460 
8461 	r = cik_cp_resume(rdev);
8462 	if (r)
8463 		return r;
8464 
8465 	r = cik_sdma_resume(rdev);
8466 	if (r)
8467 		return r;
8468 
8469 	cik_uvd_resume(rdev);
8470 	cik_vce_resume(rdev);
8471 
8472 	r = radeon_ib_pool_init(rdev);
8473 	if (r) {
8474 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8475 		return r;
8476 	}
8477 
8478 	r = radeon_vm_manager_init(rdev);
8479 	if (r) {
8480 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8481 		return r;
8482 	}
8483 
8484 	r = radeon_audio_init(rdev);
8485 	if (r)
8486 		return r;
8487 
8488 	r = radeon_kfd_resume(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	return 0;
8493 }
8494 
8495 /**
8496  * cik_resume - resume the asic to a functional state
8497  *
8498  * @rdev: radeon_device pointer
8499  *
8500  * Programs the asic to a functional state (CIK).
8501  * Called at resume.
8502  * Returns 0 for success, error for failure.
8503  */
8504 int cik_resume(struct radeon_device *rdev)
8505 {
8506 	int r;
8507 
8508 	/* post card */
8509 	atom_asic_init(rdev->mode_info.atom_context);
8510 
8511 	/* init golden registers */
8512 	cik_init_golden_registers(rdev);
8513 
8514 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8515 		radeon_pm_resume(rdev);
8516 
8517 	rdev->accel_working = true;
8518 	r = cik_startup(rdev);
8519 	if (r) {
8520 		DRM_ERROR("cik startup failed on resume\n");
8521 		rdev->accel_working = false;
8522 		return r;
8523 	}
8524 
8525 	return r;
8526 
8527 }
8528 
8529 /**
8530  * cik_suspend - suspend the asic
8531  *
8532  * @rdev: radeon_device pointer
8533  *
8534  * Bring the chip into a state suitable for suspend (CIK).
8535  * Called at suspend.
8536  * Returns 0 for success.
8537  */
8538 int cik_suspend(struct radeon_device *rdev)
8539 {
8540 	radeon_kfd_suspend(rdev);
8541 	radeon_pm_suspend(rdev);
8542 	radeon_audio_fini(rdev);
8543 	radeon_vm_manager_fini(rdev);
8544 	cik_cp_enable(rdev, false);
8545 	cik_sdma_enable(rdev, false);
8546 	if (rdev->has_uvd) {
8547 		uvd_v1_0_fini(rdev);
8548 		radeon_uvd_suspend(rdev);
8549 	}
8550 	if (rdev->has_vce)
8551 		radeon_vce_suspend(rdev);
8552 	cik_fini_pg(rdev);
8553 	cik_fini_cg(rdev);
8554 	cik_irq_suspend(rdev);
8555 	radeon_wb_disable(rdev);
8556 	cik_pcie_gart_disable(rdev);
8557 	return 0;
8558 }
8559 
8560 /* Plan is to move initialization in that function and use
8561  * helper function so that radeon_device_init pretty much
8562  * do nothing more than calling asic specific function. This
8563  * should also allow to remove a bunch of callback function
8564  * like vram_info.
8565  */
8566 /**
8567  * cik_init - asic specific driver and hw init
8568  *
8569  * @rdev: radeon_device pointer
8570  *
8571  * Setup asic specific driver variables and program the hw
8572  * to a functional state (CIK).
8573  * Called at driver startup.
8574  * Returns 0 for success, errors for failure.
8575  */
8576 int cik_init(struct radeon_device *rdev)
8577 {
8578 	struct radeon_ring *ring;
8579 	int r;
8580 
8581 	/* Read BIOS */
8582 	if (!radeon_get_bios(rdev)) {
8583 		if (ASIC_IS_AVIVO(rdev))
8584 			return -EINVAL;
8585 	}
8586 	/* Must be an ATOMBIOS */
8587 	if (!rdev->is_atom_bios) {
8588 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8589 		return -EINVAL;
8590 	}
8591 	r = radeon_atombios_init(rdev);
8592 	if (r)
8593 		return r;
8594 
8595 	/* Post card if necessary */
8596 	if (!radeon_card_posted(rdev)) {
8597 		if (!rdev->bios) {
8598 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8599 			return -EINVAL;
8600 		}
8601 		DRM_INFO("GPU not posted. posting now...\n");
8602 		atom_asic_init(rdev->mode_info.atom_context);
8603 	}
8604 	/* init golden registers */
8605 	cik_init_golden_registers(rdev);
8606 	/* Initialize scratch registers */
8607 	cik_scratch_init(rdev);
8608 	/* Initialize surface registers */
8609 	radeon_surface_init(rdev);
8610 	/* Initialize clocks */
8611 	radeon_get_clock_info(rdev->ddev);
8612 
8613 	/* Fence driver */
8614 	r = radeon_fence_driver_init(rdev);
8615 	if (r)
8616 		return r;
8617 
8618 	/* initialize memory controller */
8619 	r = cik_mc_init(rdev);
8620 	if (r)
8621 		return r;
8622 	/* Memory manager */
8623 	r = radeon_bo_init(rdev);
8624 	if (r)
8625 		return r;
8626 
8627 	if (rdev->flags & RADEON_IS_IGP) {
8628 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8629 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8630 			r = cik_init_microcode(rdev);
8631 			if (r) {
8632 				DRM_ERROR("Failed to load firmware!\n");
8633 				return r;
8634 			}
8635 		}
8636 	} else {
8637 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8638 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8639 		    !rdev->mc_fw) {
8640 			r = cik_init_microcode(rdev);
8641 			if (r) {
8642 				DRM_ERROR("Failed to load firmware!\n");
8643 				return r;
8644 			}
8645 		}
8646 	}
8647 
8648 	/* Initialize power management */
8649 	radeon_pm_init(rdev);
8650 
8651 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8652 	ring->ring_obj = NULL;
8653 	r600_ring_init(rdev, ring, 1024 * 1024);
8654 
8655 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8656 	ring->ring_obj = NULL;
8657 	r600_ring_init(rdev, ring, 1024 * 1024);
8658 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8659 	if (r)
8660 		return r;
8661 
8662 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8663 	ring->ring_obj = NULL;
8664 	r600_ring_init(rdev, ring, 1024 * 1024);
8665 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8666 	if (r)
8667 		return r;
8668 
8669 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8670 	ring->ring_obj = NULL;
8671 	r600_ring_init(rdev, ring, 256 * 1024);
8672 
8673 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8674 	ring->ring_obj = NULL;
8675 	r600_ring_init(rdev, ring, 256 * 1024);
8676 
8677 	cik_uvd_init(rdev);
8678 	cik_vce_init(rdev);
8679 
8680 	rdev->ih.ring_obj = NULL;
8681 	r600_ih_ring_init(rdev, 64 * 1024);
8682 
8683 	r = r600_pcie_gart_init(rdev);
8684 	if (r)
8685 		return r;
8686 
8687 	rdev->accel_working = true;
8688 	r = cik_startup(rdev);
8689 	if (r) {
8690 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8691 		cik_cp_fini(rdev);
8692 		cik_sdma_fini(rdev);
8693 		cik_irq_fini(rdev);
8694 		sumo_rlc_fini(rdev);
8695 		cik_mec_fini(rdev);
8696 		radeon_wb_fini(rdev);
8697 		radeon_ib_pool_fini(rdev);
8698 		radeon_vm_manager_fini(rdev);
8699 		radeon_irq_kms_fini(rdev);
8700 		cik_pcie_gart_fini(rdev);
8701 		rdev->accel_working = false;
8702 	}
8703 
8704 	/* Don't start up if the MC ucode is missing.
8705 	 * The default clocks and voltages before the MC ucode
8706 	 * is loaded are not suffient for advanced operations.
8707 	 */
8708 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8709 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8710 		return -EINVAL;
8711 	}
8712 
8713 	return 0;
8714 }
8715 
8716 /**
8717  * cik_fini - asic specific driver and hw fini
8718  *
8719  * @rdev: radeon_device pointer
8720  *
8721  * Tear down the asic specific driver variables and program the hw
8722  * to an idle state (CIK).
8723  * Called at driver unload.
8724  */
8725 void cik_fini(struct radeon_device *rdev)
8726 {
8727 	radeon_pm_fini(rdev);
8728 	cik_cp_fini(rdev);
8729 	cik_sdma_fini(rdev);
8730 	cik_fini_pg(rdev);
8731 	cik_fini_cg(rdev);
8732 	cik_irq_fini(rdev);
8733 	sumo_rlc_fini(rdev);
8734 	cik_mec_fini(rdev);
8735 	radeon_wb_fini(rdev);
8736 	radeon_vm_manager_fini(rdev);
8737 	radeon_ib_pool_fini(rdev);
8738 	radeon_irq_kms_fini(rdev);
8739 	uvd_v1_0_fini(rdev);
8740 	radeon_uvd_fini(rdev);
8741 	radeon_vce_fini(rdev);
8742 	cik_pcie_gart_fini(rdev);
8743 	r600_vram_scratch_fini(rdev);
8744 	radeon_gem_fini(rdev);
8745 	radeon_fence_driver_fini(rdev);
8746 	radeon_bo_fini(rdev);
8747 	radeon_atombios_fini(rdev);
8748 	kfree(rdev->bios);
8749 	rdev->bios = NULL;
8750 }
8751 
8752 void dce8_program_fmt(struct drm_encoder *encoder)
8753 {
8754 	struct drm_device *dev = encoder->dev;
8755 	struct radeon_device *rdev = dev->dev_private;
8756 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8757 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8758 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8759 	int bpc = 0;
8760 	u32 tmp = 0;
8761 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8762 
8763 	if (connector) {
8764 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8765 		bpc = radeon_get_monitor_bpc(connector);
8766 		dither = radeon_connector->dither;
8767 	}
8768 
8769 	/* LVDS/eDP FMT is set up by atom */
8770 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8771 		return;
8772 
8773 	/* not needed for analog */
8774 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8775 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8776 		return;
8777 
8778 	if (bpc == 0)
8779 		return;
8780 
8781 	switch (bpc) {
8782 	case 6:
8783 		if (dither == RADEON_FMT_DITHER_ENABLE)
8784 			/* XXX sort out optimal dither settings */
8785 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8787 		else
8788 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8789 		break;
8790 	case 8:
8791 		if (dither == RADEON_FMT_DITHER_ENABLE)
8792 			/* XXX sort out optimal dither settings */
8793 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8794 				FMT_RGB_RANDOM_ENABLE |
8795 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8796 		else
8797 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8798 		break;
8799 	case 10:
8800 		if (dither == RADEON_FMT_DITHER_ENABLE)
8801 			/* XXX sort out optimal dither settings */
8802 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8803 				FMT_RGB_RANDOM_ENABLE |
8804 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8805 		else
8806 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8807 		break;
8808 	default:
8809 		/* not needed */
8810 		break;
8811 	}
8812 
8813 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8814 }
8815 
8816 /* display watermark setup */
8817 /**
8818  * dce8_line_buffer_adjust - Set up the line buffer
8819  *
8820  * @rdev: radeon_device pointer
8821  * @radeon_crtc: the selected display controller
8822  * @mode: the current display mode on the selected display
8823  * controller
8824  *
8825  * Setup up the line buffer allocation for
8826  * the selected display controller (CIK).
8827  * Returns the line buffer size in pixels.
8828  */
8829 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8830 				   struct radeon_crtc *radeon_crtc,
8831 				   struct drm_display_mode *mode)
8832 {
8833 	u32 tmp, buffer_alloc, i;
8834 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8835 	/*
8836 	 * Line Buffer Setup
8837 	 * There are 6 line buffers, one for each display controllers.
8838 	 * There are 3 partitions per LB. Select the number of partitions
8839 	 * to enable based on the display width.  For display widths larger
8840 	 * than 4096, you need use to use 2 display controllers and combine
8841 	 * them using the stereo blender.
8842 	 */
8843 	if (radeon_crtc->base.enabled && mode) {
8844 		if (mode->crtc_hdisplay < 1920) {
8845 			tmp = 1;
8846 			buffer_alloc = 2;
8847 		} else if (mode->crtc_hdisplay < 2560) {
8848 			tmp = 2;
8849 			buffer_alloc = 2;
8850 		} else if (mode->crtc_hdisplay < 4096) {
8851 			tmp = 0;
8852 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8853 		} else {
8854 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8855 			tmp = 0;
8856 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8857 		}
8858 	} else {
8859 		tmp = 1;
8860 		buffer_alloc = 0;
8861 	}
8862 
8863 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8864 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8865 
8866 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8867 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8868 	for (i = 0; i < rdev->usec_timeout; i++) {
8869 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8870 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8871 			break;
8872 		udelay(1);
8873 	}
8874 
8875 	if (radeon_crtc->base.enabled && mode) {
8876 		switch (tmp) {
8877 		case 0:
8878 		default:
8879 			return 4096 * 2;
8880 		case 1:
8881 			return 1920 * 2;
8882 		case 2:
8883 			return 2560 * 2;
8884 		}
8885 	}
8886 
8887 	/* controller not enabled, so no lb used */
8888 	return 0;
8889 }
8890 
8891 /**
8892  * cik_get_number_of_dram_channels - get the number of dram channels
8893  *
8894  * @rdev: radeon_device pointer
8895  *
8896  * Look up the number of video ram channels (CIK).
8897  * Used for display watermark bandwidth calculations
8898  * Returns the number of dram channels
8899  */
8900 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8901 {
8902 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8903 
8904 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8905 	case 0:
8906 	default:
8907 		return 1;
8908 	case 1:
8909 		return 2;
8910 	case 2:
8911 		return 4;
8912 	case 3:
8913 		return 8;
8914 	case 4:
8915 		return 3;
8916 	case 5:
8917 		return 6;
8918 	case 6:
8919 		return 10;
8920 	case 7:
8921 		return 12;
8922 	case 8:
8923 		return 16;
8924 	}
8925 }
8926 
8927 struct dce8_wm_params {
8928 	u32 dram_channels; /* number of dram channels */
8929 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8930 	u32 sclk;          /* engine clock in kHz */
8931 	u32 disp_clk;      /* display clock in kHz */
8932 	u32 src_width;     /* viewport width */
8933 	u32 active_time;   /* active display time in ns */
8934 	u32 blank_time;    /* blank time in ns */
8935 	bool interlaced;    /* mode is interlaced */
8936 	fixed20_12 vsc;    /* vertical scale ratio */
8937 	u32 num_heads;     /* number of active crtcs */
8938 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8939 	u32 lb_size;       /* line buffer allocated to pipe */
8940 	u32 vtaps;         /* vertical scaler taps */
8941 };
8942 
8943 /**
8944  * dce8_dram_bandwidth - get the dram bandwidth
8945  *
8946  * @wm: watermark calculation data
8947  *
8948  * Calculate the raw dram bandwidth (CIK).
8949  * Used for display watermark bandwidth calculations
8950  * Returns the dram bandwidth in MBytes/s
8951  */
8952 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8953 {
8954 	/* Calculate raw DRAM Bandwidth */
8955 	fixed20_12 dram_efficiency; /* 0.7 */
8956 	fixed20_12 yclk, dram_channels, bandwidth;
8957 	fixed20_12 a;
8958 
8959 	a.full = dfixed_const(1000);
8960 	yclk.full = dfixed_const(wm->yclk);
8961 	yclk.full = dfixed_div(yclk, a);
8962 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963 	a.full = dfixed_const(10);
8964 	dram_efficiency.full = dfixed_const(7);
8965 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8966 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8967 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8968 
8969 	return dfixed_trunc(bandwidth);
8970 }
8971 
8972 /**
8973  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8974  *
8975  * @wm: watermark calculation data
8976  *
8977  * Calculate the dram bandwidth used for display (CIK).
8978  * Used for display watermark bandwidth calculations
8979  * Returns the dram bandwidth for display in MBytes/s
8980  */
8981 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8982 {
8983 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8984 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8985 	fixed20_12 yclk, dram_channels, bandwidth;
8986 	fixed20_12 a;
8987 
8988 	a.full = dfixed_const(1000);
8989 	yclk.full = dfixed_const(wm->yclk);
8990 	yclk.full = dfixed_div(yclk, a);
8991 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8992 	a.full = dfixed_const(10);
8993 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8994 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8995 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8996 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8997 
8998 	return dfixed_trunc(bandwidth);
8999 }
9000 
9001 /**
9002  * dce8_data_return_bandwidth - get the data return bandwidth
9003  *
9004  * @wm: watermark calculation data
9005  *
9006  * Calculate the data return bandwidth used for display (CIK).
9007  * Used for display watermark bandwidth calculations
9008  * Returns the data return bandwidth in MBytes/s
9009  */
9010 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9011 {
9012 	/* Calculate the display Data return Bandwidth */
9013 	fixed20_12 return_efficiency; /* 0.8 */
9014 	fixed20_12 sclk, bandwidth;
9015 	fixed20_12 a;
9016 
9017 	a.full = dfixed_const(1000);
9018 	sclk.full = dfixed_const(wm->sclk);
9019 	sclk.full = dfixed_div(sclk, a);
9020 	a.full = dfixed_const(10);
9021 	return_efficiency.full = dfixed_const(8);
9022 	return_efficiency.full = dfixed_div(return_efficiency, a);
9023 	a.full = dfixed_const(32);
9024 	bandwidth.full = dfixed_mul(a, sclk);
9025 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9026 
9027 	return dfixed_trunc(bandwidth);
9028 }
9029 
9030 /**
9031  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9032  *
9033  * @wm: watermark calculation data
9034  *
9035  * Calculate the dmif bandwidth used for display (CIK).
9036  * Used for display watermark bandwidth calculations
9037  * Returns the dmif bandwidth in MBytes/s
9038  */
9039 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9040 {
9041 	/* Calculate the DMIF Request Bandwidth */
9042 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9043 	fixed20_12 disp_clk, bandwidth;
9044 	fixed20_12 a, b;
9045 
9046 	a.full = dfixed_const(1000);
9047 	disp_clk.full = dfixed_const(wm->disp_clk);
9048 	disp_clk.full = dfixed_div(disp_clk, a);
9049 	a.full = dfixed_const(32);
9050 	b.full = dfixed_mul(a, disp_clk);
9051 
9052 	a.full = dfixed_const(10);
9053 	disp_clk_request_efficiency.full = dfixed_const(8);
9054 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9055 
9056 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9057 
9058 	return dfixed_trunc(bandwidth);
9059 }
9060 
9061 /**
9062  * dce8_available_bandwidth - get the min available bandwidth
9063  *
9064  * @wm: watermark calculation data
9065  *
9066  * Calculate the min available bandwidth used for display (CIK).
9067  * Used for display watermark bandwidth calculations
9068  * Returns the min available bandwidth in MBytes/s
9069  */
9070 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9071 {
9072 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9073 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9074 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9075 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9076 
9077 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9078 }
9079 
9080 /**
9081  * dce8_average_bandwidth - get the average available bandwidth
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the average available bandwidth used for display (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the average available bandwidth in MBytes/s
9088  */
9089 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9090 {
9091 	/* Calculate the display mode Average Bandwidth
9092 	 * DisplayMode should contain the source and destination dimensions,
9093 	 * timing, etc.
9094 	 */
9095 	fixed20_12 bpp;
9096 	fixed20_12 line_time;
9097 	fixed20_12 src_width;
9098 	fixed20_12 bandwidth;
9099 	fixed20_12 a;
9100 
9101 	a.full = dfixed_const(1000);
9102 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9103 	line_time.full = dfixed_div(line_time, a);
9104 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9105 	src_width.full = dfixed_const(wm->src_width);
9106 	bandwidth.full = dfixed_mul(src_width, bpp);
9107 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9108 	bandwidth.full = dfixed_div(bandwidth, line_time);
9109 
9110 	return dfixed_trunc(bandwidth);
9111 }
9112 
9113 /**
9114  * dce8_latency_watermark - get the latency watermark
9115  *
9116  * @wm: watermark calculation data
9117  *
9118  * Calculate the latency watermark (CIK).
9119  * Used for display watermark bandwidth calculations
9120  * Returns the latency watermark in ns
9121  */
9122 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9123 {
9124 	/* First calculate the latency in ns */
9125 	u32 mc_latency = 2000; /* 2000 ns. */
9126 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9127 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9128 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9129 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9130 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9131 		(wm->num_heads * cursor_line_pair_return_time);
9132 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9133 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9134 	u32 tmp, dmif_size = 12288;
9135 	fixed20_12 a, b, c;
9136 
9137 	if (wm->num_heads == 0)
9138 		return 0;
9139 
9140 	a.full = dfixed_const(2);
9141 	b.full = dfixed_const(1);
9142 	if ((wm->vsc.full > a.full) ||
9143 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9144 	    (wm->vtaps >= 5) ||
9145 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9146 		max_src_lines_per_dst_line = 4;
9147 	else
9148 		max_src_lines_per_dst_line = 2;
9149 
9150 	a.full = dfixed_const(available_bandwidth);
9151 	b.full = dfixed_const(wm->num_heads);
9152 	a.full = dfixed_div(a, b);
9153 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9154 	tmp = min(dfixed_trunc(a), tmp);
9155 
9156 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9157 
9158 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9159 	b.full = dfixed_const(1000);
9160 	c.full = dfixed_const(lb_fill_bw);
9161 	b.full = dfixed_div(c, b);
9162 	a.full = dfixed_div(a, b);
9163 	line_fill_time = dfixed_trunc(a);
9164 
9165 	if (line_fill_time < wm->active_time)
9166 		return latency;
9167 	else
9168 		return latency + (line_fill_time - wm->active_time);
9169 
9170 }
9171 
9172 /**
9173  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9174  * average and available dram bandwidth
9175  *
9176  * @wm: watermark calculation data
9177  *
9178  * Check if the display average bandwidth fits in the display
9179  * dram bandwidth (CIK).
9180  * Used for display watermark bandwidth calculations
9181  * Returns true if the display fits, false if not.
9182  */
9183 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9184 {
9185 	if (dce8_average_bandwidth(wm) <=
9186 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9187 		return true;
9188 	else
9189 		return false;
9190 }
9191 
9192 /**
9193  * dce8_average_bandwidth_vs_available_bandwidth - check
9194  * average and available bandwidth
9195  *
9196  * @wm: watermark calculation data
9197  *
9198  * Check if the display average bandwidth fits in the display
9199  * available bandwidth (CIK).
9200  * Used for display watermark bandwidth calculations
9201  * Returns true if the display fits, false if not.
9202  */
9203 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9204 {
9205 	if (dce8_average_bandwidth(wm) <=
9206 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9207 		return true;
9208 	else
9209 		return false;
9210 }
9211 
9212 /**
9213  * dce8_check_latency_hiding - check latency hiding
9214  *
9215  * @wm: watermark calculation data
9216  *
9217  * Check latency hiding (CIK).
9218  * Used for display watermark bandwidth calculations
9219  * Returns true if the display fits, false if not.
9220  */
9221 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9222 {
9223 	u32 lb_partitions = wm->lb_size / wm->src_width;
9224 	u32 line_time = wm->active_time + wm->blank_time;
9225 	u32 latency_tolerant_lines;
9226 	u32 latency_hiding;
9227 	fixed20_12 a;
9228 
9229 	a.full = dfixed_const(1);
9230 	if (wm->vsc.full > a.full)
9231 		latency_tolerant_lines = 1;
9232 	else {
9233 		if (lb_partitions <= (wm->vtaps + 1))
9234 			latency_tolerant_lines = 1;
9235 		else
9236 			latency_tolerant_lines = 2;
9237 	}
9238 
9239 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9240 
9241 	if (dce8_latency_watermark(wm) <= latency_hiding)
9242 		return true;
9243 	else
9244 		return false;
9245 }
9246 
9247 /**
9248  * dce8_program_watermarks - program display watermarks
9249  *
9250  * @rdev: radeon_device pointer
9251  * @radeon_crtc: the selected display controller
9252  * @lb_size: line buffer size
9253  * @num_heads: number of display controllers in use
9254  *
9255  * Calculate and program the display watermarks for the
9256  * selected display controller (CIK).
9257  */
9258 static void dce8_program_watermarks(struct radeon_device *rdev,
9259 				    struct radeon_crtc *radeon_crtc,
9260 				    u32 lb_size, u32 num_heads)
9261 {
9262 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9263 	struct dce8_wm_params wm_low, wm_high;
9264 	u32 active_time;
9265 	u32 line_time = 0;
9266 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9267 	u32 tmp, wm_mask;
9268 
9269 	if (radeon_crtc->base.enabled && num_heads && mode) {
9270 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9271 					    (u32)mode->clock);
9272 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9273 					  (u32)mode->clock);
9274 		line_time = min(line_time, (u32)65535);
9275 
9276 		/* watermark for high clocks */
9277 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9278 		    rdev->pm.dpm_enabled) {
9279 			wm_high.yclk =
9280 				radeon_dpm_get_mclk(rdev, false) * 10;
9281 			wm_high.sclk =
9282 				radeon_dpm_get_sclk(rdev, false) * 10;
9283 		} else {
9284 			wm_high.yclk = rdev->pm.current_mclk * 10;
9285 			wm_high.sclk = rdev->pm.current_sclk * 10;
9286 		}
9287 
9288 		wm_high.disp_clk = mode->clock;
9289 		wm_high.src_width = mode->crtc_hdisplay;
9290 		wm_high.active_time = active_time;
9291 		wm_high.blank_time = line_time - wm_high.active_time;
9292 		wm_high.interlaced = false;
9293 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9294 			wm_high.interlaced = true;
9295 		wm_high.vsc = radeon_crtc->vsc;
9296 		wm_high.vtaps = 1;
9297 		if (radeon_crtc->rmx_type != RMX_OFF)
9298 			wm_high.vtaps = 2;
9299 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9300 		wm_high.lb_size = lb_size;
9301 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9302 		wm_high.num_heads = num_heads;
9303 
9304 		/* set for high clocks */
9305 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9306 
9307 		/* possibly force display priority to high */
9308 		/* should really do this at mode validation time... */
9309 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9310 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9311 		    !dce8_check_latency_hiding(&wm_high) ||
9312 		    (rdev->disp_priority == 2)) {
9313 			DRM_DEBUG_KMS("force priority to high\n");
9314 		}
9315 
9316 		/* watermark for low clocks */
9317 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9318 		    rdev->pm.dpm_enabled) {
9319 			wm_low.yclk =
9320 				radeon_dpm_get_mclk(rdev, true) * 10;
9321 			wm_low.sclk =
9322 				radeon_dpm_get_sclk(rdev, true) * 10;
9323 		} else {
9324 			wm_low.yclk = rdev->pm.current_mclk * 10;
9325 			wm_low.sclk = rdev->pm.current_sclk * 10;
9326 		}
9327 
9328 		wm_low.disp_clk = mode->clock;
9329 		wm_low.src_width = mode->crtc_hdisplay;
9330 		wm_low.active_time = active_time;
9331 		wm_low.blank_time = line_time - wm_low.active_time;
9332 		wm_low.interlaced = false;
9333 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9334 			wm_low.interlaced = true;
9335 		wm_low.vsc = radeon_crtc->vsc;
9336 		wm_low.vtaps = 1;
9337 		if (radeon_crtc->rmx_type != RMX_OFF)
9338 			wm_low.vtaps = 2;
9339 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9340 		wm_low.lb_size = lb_size;
9341 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9342 		wm_low.num_heads = num_heads;
9343 
9344 		/* set for low clocks */
9345 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9346 
9347 		/* possibly force display priority to high */
9348 		/* should really do this at mode validation time... */
9349 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9350 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9351 		    !dce8_check_latency_hiding(&wm_low) ||
9352 		    (rdev->disp_priority == 2)) {
9353 			DRM_DEBUG_KMS("force priority to high\n");
9354 		}
9355 
9356 		/* Save number of lines the linebuffer leads before the scanout */
9357 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9358 	}
9359 
9360 	/* select wm A */
9361 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9362 	tmp = wm_mask;
9363 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9364 	tmp |= LATENCY_WATERMARK_MASK(1);
9365 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9368 		LATENCY_HIGH_WATERMARK(line_time)));
9369 	/* select wm B */
9370 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9371 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9372 	tmp |= LATENCY_WATERMARK_MASK(2);
9373 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9374 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9375 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9376 		LATENCY_HIGH_WATERMARK(line_time)));
9377 	/* restore original selection */
9378 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9379 
9380 	/* save values for DPM */
9381 	radeon_crtc->line_time = line_time;
9382 	radeon_crtc->wm_high = latency_watermark_a;
9383 	radeon_crtc->wm_low = latency_watermark_b;
9384 }
9385 
9386 /**
9387  * dce8_bandwidth_update - program display watermarks
9388  *
9389  * @rdev: radeon_device pointer
9390  *
9391  * Calculate and program the display watermarks and line
9392  * buffer allocation (CIK).
9393  */
9394 void dce8_bandwidth_update(struct radeon_device *rdev)
9395 {
9396 	struct drm_display_mode *mode = NULL;
9397 	u32 num_heads = 0, lb_size;
9398 	int i;
9399 
9400 	if (!rdev->mode_info.mode_config_initialized)
9401 		return;
9402 
9403 	radeon_update_display_priority(rdev);
9404 
9405 	for (i = 0; i < rdev->num_crtc; i++) {
9406 		if (rdev->mode_info.crtcs[i]->base.enabled)
9407 			num_heads++;
9408 	}
9409 	for (i = 0; i < rdev->num_crtc; i++) {
9410 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9411 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9412 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9413 	}
9414 }
9415 
9416 /**
9417  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9418  *
9419  * @rdev: radeon_device pointer
9420  *
9421  * Fetches a GPU clock counter snapshot (SI).
9422  * Returns the 64 bit clock counter snapshot.
9423  */
9424 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9425 {
9426 	uint64_t clock;
9427 
9428 	mutex_lock(&rdev->gpu_clock_mutex);
9429 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9430 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9431 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9432 	mutex_unlock(&rdev->gpu_clock_mutex);
9433 	return clock;
9434 }
9435 
9436 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9437 			     u32 cntl_reg, u32 status_reg)
9438 {
9439 	int r, i;
9440 	struct atom_clock_dividers dividers;
9441 	uint32_t tmp;
9442 
9443 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9444 					   clock, false, &dividers);
9445 	if (r)
9446 		return r;
9447 
9448 	tmp = RREG32_SMC(cntl_reg);
9449 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9450 	tmp |= dividers.post_divider;
9451 	WREG32_SMC(cntl_reg, tmp);
9452 
9453 	for (i = 0; i < 100; i++) {
9454 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9455 			break;
9456 		mdelay(10);
9457 	}
9458 	if (i == 100)
9459 		return -ETIMEDOUT;
9460 
9461 	return 0;
9462 }
9463 
9464 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9465 {
9466 	int r = 0;
9467 
9468 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9469 	if (r)
9470 		return r;
9471 
9472 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9473 	return r;
9474 }
9475 
9476 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9477 {
9478 	int r, i;
9479 	struct atom_clock_dividers dividers;
9480 	u32 tmp;
9481 
9482 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9483 					   ecclk, false, &dividers);
9484 	if (r)
9485 		return r;
9486 
9487 	for (i = 0; i < 100; i++) {
9488 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9489 			break;
9490 		mdelay(10);
9491 	}
9492 	if (i == 100)
9493 		return -ETIMEDOUT;
9494 
9495 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9496 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9497 	tmp |= dividers.post_divider;
9498 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9499 
9500 	for (i = 0; i < 100; i++) {
9501 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9502 			break;
9503 		mdelay(10);
9504 	}
9505 	if (i == 100)
9506 		return -ETIMEDOUT;
9507 
9508 	return 0;
9509 }
9510 
9511 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9512 {
9513 	struct pci_dev *root = rdev->pdev->bus->self;
9514 	int bridge_pos, gpu_pos;
9515 	u32 speed_cntl, mask, current_data_rate;
9516 	int ret, i;
9517 	u16 tmp16;
9518 
9519 	if (pci_is_root_bus(rdev->pdev->bus))
9520 		return;
9521 
9522 	if (radeon_pcie_gen2 == 0)
9523 		return;
9524 
9525 	if (rdev->flags & RADEON_IS_IGP)
9526 		return;
9527 
9528 	if (!(rdev->flags & RADEON_IS_PCIE))
9529 		return;
9530 
9531 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9532 	if (ret != 0)
9533 		return;
9534 
9535 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9536 		return;
9537 
9538 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9539 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9540 		LC_CURRENT_DATA_RATE_SHIFT;
9541 	if (mask & DRM_PCIE_SPEED_80) {
9542 		if (current_data_rate == 2) {
9543 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9544 			return;
9545 		}
9546 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9547 	} else if (mask & DRM_PCIE_SPEED_50) {
9548 		if (current_data_rate == 1) {
9549 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9550 			return;
9551 		}
9552 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9553 	}
9554 
9555 	bridge_pos = pci_pcie_cap(root);
9556 	if (!bridge_pos)
9557 		return;
9558 
9559 	gpu_pos = pci_pcie_cap(rdev->pdev);
9560 	if (!gpu_pos)
9561 		return;
9562 
9563 	if (mask & DRM_PCIE_SPEED_80) {
9564 		/* re-try equalization if gen3 is not already enabled */
9565 		if (current_data_rate != 2) {
9566 			u16 bridge_cfg, gpu_cfg;
9567 			u16 bridge_cfg2, gpu_cfg2;
9568 			u32 max_lw, current_lw, tmp;
9569 
9570 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9571 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9572 
9573 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9574 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9575 
9576 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9577 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9578 
9579 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9580 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9581 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9582 
9583 			if (current_lw < max_lw) {
9584 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9585 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9586 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9587 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9588 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9589 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9590 				}
9591 			}
9592 
9593 			for (i = 0; i < 10; i++) {
9594 				/* check status */
9595 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9596 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9597 					break;
9598 
9599 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9600 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9601 
9602 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9603 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9604 
9605 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9606 				tmp |= LC_SET_QUIESCE;
9607 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9608 
9609 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9610 				tmp |= LC_REDO_EQ;
9611 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9612 
9613 				mdelay(100);
9614 
9615 				/* linkctl */
9616 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9617 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9618 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9619 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9620 
9621 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9622 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9623 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9624 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9625 
9626 				/* linkctl2 */
9627 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9628 				tmp16 &= ~((1 << 4) | (7 << 9));
9629 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9630 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9631 
9632 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9633 				tmp16 &= ~((1 << 4) | (7 << 9));
9634 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9635 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9636 
9637 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9638 				tmp &= ~LC_SET_QUIESCE;
9639 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9640 			}
9641 		}
9642 	}
9643 
9644 	/* set the link speed */
9645 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9646 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9647 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9648 
9649 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9650 	tmp16 &= ~0xf;
9651 	if (mask & DRM_PCIE_SPEED_80)
9652 		tmp16 |= 3; /* gen3 */
9653 	else if (mask & DRM_PCIE_SPEED_50)
9654 		tmp16 |= 2; /* gen2 */
9655 	else
9656 		tmp16 |= 1; /* gen1 */
9657 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9658 
9659 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9660 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9661 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9662 
9663 	for (i = 0; i < rdev->usec_timeout; i++) {
9664 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9665 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9666 			break;
9667 		udelay(1);
9668 	}
9669 }
9670 
9671 static void cik_program_aspm(struct radeon_device *rdev)
9672 {
9673 	u32 data, orig;
9674 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9675 	bool disable_clkreq = false;
9676 
9677 	if (radeon_aspm == 0)
9678 		return;
9679 
9680 	/* XXX double check IGPs */
9681 	if (rdev->flags & RADEON_IS_IGP)
9682 		return;
9683 
9684 	if (!(rdev->flags & RADEON_IS_PCIE))
9685 		return;
9686 
9687 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9688 	data &= ~LC_XMIT_N_FTS_MASK;
9689 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9690 	if (orig != data)
9691 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9692 
9693 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9694 	data |= LC_GO_TO_RECOVERY;
9695 	if (orig != data)
9696 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9697 
9698 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9699 	data |= P_IGNORE_EDB_ERR;
9700 	if (orig != data)
9701 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9702 
9703 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9704 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9705 	data |= LC_PMI_TO_L1_DIS;
9706 	if (!disable_l0s)
9707 		data |= LC_L0S_INACTIVITY(7);
9708 
9709 	if (!disable_l1) {
9710 		data |= LC_L1_INACTIVITY(7);
9711 		data &= ~LC_PMI_TO_L1_DIS;
9712 		if (orig != data)
9713 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9714 
9715 		if (!disable_plloff_in_l1) {
9716 			bool clk_req_support;
9717 
9718 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9719 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9720 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9721 			if (orig != data)
9722 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9723 
9724 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9725 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9726 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9727 			if (orig != data)
9728 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9729 
9730 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9731 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9732 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9733 			if (orig != data)
9734 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9735 
9736 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9737 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9738 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9739 			if (orig != data)
9740 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9741 
9742 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9743 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9744 			data |= LC_DYN_LANES_PWR_STATE(3);
9745 			if (orig != data)
9746 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9747 
9748 			if (!disable_clkreq &&
9749 			    !pci_is_root_bus(rdev->pdev->bus)) {
9750 				struct pci_dev *root = rdev->pdev->bus->self;
9751 				u32 lnkcap;
9752 
9753 				clk_req_support = false;
9754 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9755 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9756 					clk_req_support = true;
9757 			} else {
9758 				clk_req_support = false;
9759 			}
9760 
9761 			if (clk_req_support) {
9762 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9763 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9764 				if (orig != data)
9765 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9766 
9767 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9768 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9769 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9770 				if (orig != data)
9771 					WREG32_SMC(THM_CLK_CNTL, data);
9772 
9773 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9774 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9775 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9776 				if (orig != data)
9777 					WREG32_SMC(MISC_CLK_CTRL, data);
9778 
9779 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9780 				data &= ~BCLK_AS_XCLK;
9781 				if (orig != data)
9782 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9783 
9784 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9785 				data &= ~FORCE_BIF_REFCLK_EN;
9786 				if (orig != data)
9787 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9788 
9789 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9790 				data &= ~MPLL_CLKOUT_SEL_MASK;
9791 				data |= MPLL_CLKOUT_SEL(4);
9792 				if (orig != data)
9793 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9794 			}
9795 		}
9796 	} else {
9797 		if (orig != data)
9798 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9799 	}
9800 
9801 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9802 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9803 	if (orig != data)
9804 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9805 
9806 	if (!disable_l0s) {
9807 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9808 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9809 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9810 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9811 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9812 				data &= ~LC_L0S_INACTIVITY_MASK;
9813 				if (orig != data)
9814 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9815 			}
9816 		}
9817 	}
9818 }
9819