xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision fb960bd2)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146 					  bool enable);
147 
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159 				  u32 reg, u32 *val)
160 {
161 	switch (reg) {
162 	case GRBM_STATUS:
163 	case GRBM_STATUS2:
164 	case GRBM_STATUS_SE0:
165 	case GRBM_STATUS_SE1:
166 	case GRBM_STATUS_SE2:
167 	case GRBM_STATUS_SE3:
168 	case SRBM_STATUS:
169 	case SRBM_STATUS2:
170 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172 	case UVD_STATUS:
173 	/* TODO VCE */
174 		*val = RREG32(reg);
175 		return 0;
176 	default:
177 		return -EINVAL;
178 	}
179 }
180 
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186 	unsigned long flags;
187 	u32 r;
188 
189 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190 	WREG32(CIK_DIDT_IND_INDEX, (reg));
191 	r = RREG32(CIK_DIDT_IND_DATA);
192 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193 	return r;
194 }
195 
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198 	unsigned long flags;
199 
200 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201 	WREG32(CIK_DIDT_IND_INDEX, (reg));
202 	WREG32(CIK_DIDT_IND_DATA, (v));
203 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205 
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209 	u32 temp;
210 	int actual_temp = 0;
211 
212 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213 		CTF_TEMP_SHIFT;
214 
215 	if (temp & 0x200)
216 		actual_temp = 255;
217 	else
218 		actual_temp = temp & 0x1ff;
219 
220 	actual_temp = actual_temp * 1000;
221 
222 	return actual_temp;
223 }
224 
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228 	u32 temp;
229 	int actual_temp = 0;
230 
231 	temp = RREG32_SMC(0xC0300E0C);
232 
233 	if (temp)
234 		actual_temp = (temp / 8) - 49;
235 	else
236 		actual_temp = 0;
237 
238 	actual_temp = actual_temp * 1000;
239 
240 	return actual_temp;
241 }
242 
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248 	unsigned long flags;
249 	u32 r;
250 
251 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252 	WREG32(PCIE_INDEX, reg);
253 	(void)RREG32(PCIE_INDEX);
254 	r = RREG32(PCIE_DATA);
255 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256 	return r;
257 }
258 
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261 	unsigned long flags;
262 
263 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264 	WREG32(PCIE_INDEX, reg);
265 	(void)RREG32(PCIE_INDEX);
266 	WREG32(PCIE_DATA, v);
267 	(void)RREG32(PCIE_DATA);
268 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270 
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273 	(0x0e00 << 16) | (0xc12c >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc140 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc150 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc15c >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc168 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc170 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc178 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc204 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b4 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2b8 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2bc >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc2c0 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x8228 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x829c >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x869c >> 2),
302 	0x00000000,
303 	(0x0600 << 16) | (0x98f4 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x98f8 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x9900 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc260 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x90e8 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c000 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x3c00c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x8c1c >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x9700 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x4e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x5e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x6e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x7e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x8e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0x9e00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xae00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0xbe00 << 16) | (0xcd20 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x89bc >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0x8900 >> 2),
342 	0x00000000,
343 	0x3,
344 	(0x0e00 << 16) | (0xc130 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc134 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc1fc >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc208 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc264 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc268 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc26c >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc270 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc274 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc278 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc27c >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc280 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc284 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc288 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc28c >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc290 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc294 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc298 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc29c >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a0 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a4 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2a8 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2ac  >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc2b0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x301d0 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30238 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30250 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30254 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30258 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0x3025c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc900 >> 2),
419 	0x00000000,
420 	(0x4e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x5e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x6e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x7e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x8e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0x9e00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xae00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0xbe00 << 16) | (0xc904 >> 2),
435 	0x00000000,
436 	(0x4e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x5e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x6e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x7e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x8e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0x9e00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xae00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0xbe00 << 16) | (0xc908 >> 2),
451 	0x00000000,
452 	(0x4e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x5e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x6e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x7e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x8e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0x9e00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xae00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0xbe00 << 16) | (0xc90c >> 2),
467 	0x00000000,
468 	(0x4e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x5e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x6e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x7e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x8e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0x9e00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xae00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0xbe00 << 16) | (0xc910 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xc99c >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9834 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f00 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f04 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f08 >> 2),
499 	0x00000000,
500 	(0x0000 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0001 << 16) | (0x30f0c >> 2),
503 	0x00000000,
504 	(0x0600 << 16) | (0x9b7c >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a14 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8a18 >> 2),
509 	0x00000000,
510 	(0x0600 << 16) | (0x30a00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bf0 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8bcc >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8b24 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x30a04 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a10 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a14 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a18 >> 2),
525 	0x00000000,
526 	(0x0600 << 16) | (0x30a2c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc700 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc704 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc708 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xc768 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc770 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc774 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc778 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc77c >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc780 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc784 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc788 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc78c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc798 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc79c >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a0 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a4 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7a8 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7ac >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b0 >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0xc7b4 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x9100 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x3c010 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92a8 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92ac >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b4 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92b8 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92bc >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c0 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c4 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92c8 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92cc >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x92d0 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c00 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c04 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c20 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c38 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8c3c >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xae00 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x9604 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac08 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac0c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac10 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac14 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac58 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac68 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac6c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac70 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac74 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac78 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac7c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac80 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac84 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac88 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xac8c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x970c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9714 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x9718 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x971c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x4e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x5e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x6e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x7e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x8e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0x9e00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xae00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0xbe00 << 16) | (0x31068 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd10 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xcd14 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b0 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b4 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88b8 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88bc >> 2),
673 	0x00000000,
674 	(0x0400 << 16) | (0x89c0 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c4 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88c8 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d0 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d4 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x88d8 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x8980 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30938 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x3093c >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30940 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89a0 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30900 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x30904 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x89b4 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c210 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c214 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x3c218 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x8904 >> 2),
709 	0x00000000,
710 	0x5,
711 	(0x0e00 << 16) | (0x8c28 >> 2),
712 	(0x0e00 << 16) | (0x8c2c >> 2),
713 	(0x0e00 << 16) | (0x8c30 >> 2),
714 	(0x0e00 << 16) | (0x8c34 >> 2),
715 	(0x0e00 << 16) | (0x9600 >> 2),
716 };
717 
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720 	(0x0e00 << 16) | (0xc12c >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc140 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc150 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc15c >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc168 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc170 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc204 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b4 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2b8 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2bc >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc2c0 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x8228 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x829c >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x869c >> 2),
747 	0x00000000,
748 	(0x0600 << 16) | (0x98f4 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x98f8 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x9900 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc260 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x90e8 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c000 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x3c00c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x8c1c >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0x9700 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x4e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x5e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x6e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x7e00 << 16) | (0xcd20 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x89bc >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x8900 >> 2),
779 	0x00000000,
780 	0x3,
781 	(0x0e00 << 16) | (0xc130 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc134 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc1fc >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc208 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc264 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc268 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc26c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc270 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc274 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc28c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc290 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc294 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc298 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a0 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a4 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2a8 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xc2ac >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x301d0 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30238 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30250 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30254 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x30258 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x3025c >> 2),
826 	0x00000000,
827 	(0x4e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x5e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x6e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x7e00 << 16) | (0xc900 >> 2),
834 	0x00000000,
835 	(0x4e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x5e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x6e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x7e00 << 16) | (0xc904 >> 2),
842 	0x00000000,
843 	(0x4e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x5e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x6e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x7e00 << 16) | (0xc908 >> 2),
850 	0x00000000,
851 	(0x4e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x5e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x6e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x7e00 << 16) | (0xc90c >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0xc910 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xc99c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x9834 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f00 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f04 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f08 >> 2),
876 	0x00000000,
877 	(0x0000 << 16) | (0x30f0c >> 2),
878 	0x00000000,
879 	(0x0600 << 16) | (0x9b7c >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a14 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8a18 >> 2),
884 	0x00000000,
885 	(0x0600 << 16) | (0x30a00 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bf0 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8bcc >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8b24 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30a04 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a10 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a14 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a18 >> 2),
900 	0x00000000,
901 	(0x0600 << 16) | (0x30a2c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc700 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc704 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc708 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0xc768 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc770 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc774 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc798 >> 2),
916 	0x00000000,
917 	(0x0400 << 16) | (0xc79c >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x9100 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c010 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c00 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c04 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c20 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c38 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x8c3c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xae00 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x9604 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac08 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac0c >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac10 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac14 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac58 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac68 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac6c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac70 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac74 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac78 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac7c >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac80 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac84 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac88 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0xac8c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x970c >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9714 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x9718 >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x971c >> 2),
974 	0x00000000,
975 	(0x0e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x4e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x5e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x6e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x7e00 << 16) | (0x31068 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd10 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0xcd14 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b0 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b4 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88b8 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88bc >> 2),
996 	0x00000000,
997 	(0x0400 << 16) | (0x89c0 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c4 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88c8 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d0 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d4 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x88d8 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x8980 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30938 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x3093c >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30940 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x89a0 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30900 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x30904 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x89b4 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3e1fc >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c210 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c214 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x3c218 >> 2),
1032 	0x00000000,
1033 	(0x0e00 << 16) | (0x8904 >> 2),
1034 	0x00000000,
1035 	0x5,
1036 	(0x0e00 << 16) | (0x8c28 >> 2),
1037 	(0x0e00 << 16) | (0x8c2c >> 2),
1038 	(0x0e00 << 16) | (0x8c30 >> 2),
1039 	(0x0e00 << 16) | (0x8c34 >> 2),
1040 	(0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042 
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045 	0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047 
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050 	0xc770, 0xffffffff, 0x00000800,
1051 	0xc774, 0xffffffff, 0x00000800,
1052 	0xc798, 0xffffffff, 0x00007fbf,
1053 	0xc79c, 0xffffffff, 0x00007faf
1054 };
1055 
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058 	0x3354, 0x00000333, 0x00000333,
1059 	0x3350, 0x000c0fc0, 0x00040200,
1060 	0x9a10, 0x00010000, 0x00058208,
1061 	0x3c000, 0xffff1fff, 0x00140000,
1062 	0x3c200, 0xfdfc0fff, 0x00000100,
1063 	0x3c234, 0x40000000, 0x40000200,
1064 	0x9830, 0xffffffff, 0x00000000,
1065 	0x9834, 0xf00fffff, 0x00000400,
1066 	0x9838, 0x0002021c, 0x00020200,
1067 	0xc78, 0x00000080, 0x00000000,
1068 	0x5bb0, 0x000000f0, 0x00000070,
1069 	0x5bc0, 0xf0311fff, 0x80300000,
1070 	0x98f8, 0x73773777, 0x12010001,
1071 	0x350c, 0x00810000, 0x408af000,
1072 	0x7030, 0x31000111, 0x00000011,
1073 	0x2f48, 0x73773777, 0x12010001,
1074 	0x220c, 0x00007fb6, 0x0021a1b1,
1075 	0x2210, 0x00007fb6, 0x002021b1,
1076 	0x2180, 0x00007fb6, 0x00002191,
1077 	0x2218, 0x00007fb6, 0x002121b1,
1078 	0x221c, 0x00007fb6, 0x002021b1,
1079 	0x21dc, 0x00007fb6, 0x00002191,
1080 	0x21e0, 0x00007fb6, 0x00002191,
1081 	0x3628, 0x0000003f, 0x0000000a,
1082 	0x362c, 0x0000003f, 0x0000000a,
1083 	0x2ae4, 0x00073ffe, 0x000022a2,
1084 	0x240c, 0x000007ff, 0x00000000,
1085 	0x8a14, 0xf000003f, 0x00000007,
1086 	0x8bf0, 0x00002001, 0x00000001,
1087 	0x8b24, 0xffffffff, 0x00ffffff,
1088 	0x30a04, 0x0000ff0f, 0x00000000,
1089 	0x28a4c, 0x07ffffff, 0x06000000,
1090 	0x4d8, 0x00000fff, 0x00000100,
1091 	0x3e78, 0x00000001, 0x00000002,
1092 	0x9100, 0x03000000, 0x0362c688,
1093 	0x8c00, 0x000000ff, 0x00000001,
1094 	0xe40, 0x00001fff, 0x00001fff,
1095 	0x9060, 0x0000007f, 0x00000020,
1096 	0x9508, 0x00010000, 0x00010000,
1097 	0xac14, 0x000003ff, 0x000000f3,
1098 	0xac0c, 0xffffffff, 0x00001032
1099 };
1100 
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103 	0xc420, 0xffffffff, 0xfffffffc,
1104 	0x30800, 0xffffffff, 0xe0000000,
1105 	0x3c2a0, 0xffffffff, 0x00000100,
1106 	0x3c208, 0xffffffff, 0x00000100,
1107 	0x3c2c0, 0xffffffff, 0xc0000100,
1108 	0x3c2c8, 0xffffffff, 0xc0000100,
1109 	0x3c2c4, 0xffffffff, 0xc0000100,
1110 	0x55e4, 0xffffffff, 0x00600100,
1111 	0x3c280, 0xffffffff, 0x00000100,
1112 	0x3c214, 0xffffffff, 0x06000100,
1113 	0x3c220, 0xffffffff, 0x00000100,
1114 	0x3c218, 0xffffffff, 0x06000100,
1115 	0x3c204, 0xffffffff, 0x00000100,
1116 	0x3c2e0, 0xffffffff, 0x00000100,
1117 	0x3c224, 0xffffffff, 0x00000100,
1118 	0x3c200, 0xffffffff, 0x00000100,
1119 	0x3c230, 0xffffffff, 0x00000100,
1120 	0x3c234, 0xffffffff, 0x00000100,
1121 	0x3c250, 0xffffffff, 0x00000100,
1122 	0x3c254, 0xffffffff, 0x00000100,
1123 	0x3c258, 0xffffffff, 0x00000100,
1124 	0x3c25c, 0xffffffff, 0x00000100,
1125 	0x3c260, 0xffffffff, 0x00000100,
1126 	0x3c27c, 0xffffffff, 0x00000100,
1127 	0x3c278, 0xffffffff, 0x00000100,
1128 	0x3c210, 0xffffffff, 0x06000100,
1129 	0x3c290, 0xffffffff, 0x00000100,
1130 	0x3c274, 0xffffffff, 0x00000100,
1131 	0x3c2b4, 0xffffffff, 0x00000100,
1132 	0x3c2b0, 0xffffffff, 0x00000100,
1133 	0x3c270, 0xffffffff, 0x00000100,
1134 	0x30800, 0xffffffff, 0xe0000000,
1135 	0x3c020, 0xffffffff, 0x00010000,
1136 	0x3c024, 0xffffffff, 0x00030002,
1137 	0x3c028, 0xffffffff, 0x00040007,
1138 	0x3c02c, 0xffffffff, 0x00060005,
1139 	0x3c030, 0xffffffff, 0x00090008,
1140 	0x3c034, 0xffffffff, 0x00010000,
1141 	0x3c038, 0xffffffff, 0x00030002,
1142 	0x3c03c, 0xffffffff, 0x00040007,
1143 	0x3c040, 0xffffffff, 0x00060005,
1144 	0x3c044, 0xffffffff, 0x00090008,
1145 	0x3c048, 0xffffffff, 0x00010000,
1146 	0x3c04c, 0xffffffff, 0x00030002,
1147 	0x3c050, 0xffffffff, 0x00040007,
1148 	0x3c054, 0xffffffff, 0x00060005,
1149 	0x3c058, 0xffffffff, 0x00090008,
1150 	0x3c05c, 0xffffffff, 0x00010000,
1151 	0x3c060, 0xffffffff, 0x00030002,
1152 	0x3c064, 0xffffffff, 0x00040007,
1153 	0x3c068, 0xffffffff, 0x00060005,
1154 	0x3c06c, 0xffffffff, 0x00090008,
1155 	0x3c070, 0xffffffff, 0x00010000,
1156 	0x3c074, 0xffffffff, 0x00030002,
1157 	0x3c078, 0xffffffff, 0x00040007,
1158 	0x3c07c, 0xffffffff, 0x00060005,
1159 	0x3c080, 0xffffffff, 0x00090008,
1160 	0x3c084, 0xffffffff, 0x00010000,
1161 	0x3c088, 0xffffffff, 0x00030002,
1162 	0x3c08c, 0xffffffff, 0x00040007,
1163 	0x3c090, 0xffffffff, 0x00060005,
1164 	0x3c094, 0xffffffff, 0x00090008,
1165 	0x3c098, 0xffffffff, 0x00010000,
1166 	0x3c09c, 0xffffffff, 0x00030002,
1167 	0x3c0a0, 0xffffffff, 0x00040007,
1168 	0x3c0a4, 0xffffffff, 0x00060005,
1169 	0x3c0a8, 0xffffffff, 0x00090008,
1170 	0x3c000, 0xffffffff, 0x96e00200,
1171 	0x8708, 0xffffffff, 0x00900100,
1172 	0xc424, 0xffffffff, 0x0020003f,
1173 	0x38, 0xffffffff, 0x0140001c,
1174 	0x3c, 0x000f0000, 0x000f0000,
1175 	0x220, 0xffffffff, 0xC060000C,
1176 	0x224, 0xc0000fff, 0x00000100,
1177 	0xf90, 0xffffffff, 0x00000100,
1178 	0xf98, 0x00000101, 0x00000000,
1179 	0x20a8, 0xffffffff, 0x00000104,
1180 	0x55e4, 0xff000fff, 0x00000100,
1181 	0x30cc, 0xc0000fff, 0x00000104,
1182 	0xc1e4, 0x00000001, 0x00000001,
1183 	0xd00c, 0xff000ff0, 0x00000100,
1184 	0xd80c, 0xff000ff0, 0x00000100
1185 };
1186 
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189 	0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191 
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194 	0xc770, 0xffffffff, 0x00000800,
1195 	0xc774, 0xffffffff, 0x00000800,
1196 	0xc798, 0xffffffff, 0x00007fbf,
1197 	0xc79c, 0xffffffff, 0x00007faf
1198 };
1199 
1200 static const u32 spectre_golden_registers[] =
1201 {
1202 	0x3c000, 0xffff1fff, 0x96940200,
1203 	0x3c00c, 0xffff0001, 0xff000000,
1204 	0x3c200, 0xfffc0fff, 0x00000100,
1205 	0x6ed8, 0x00010101, 0x00010000,
1206 	0x9834, 0xf00fffff, 0x00000400,
1207 	0x9838, 0xfffffffc, 0x00020200,
1208 	0x5bb0, 0x000000f0, 0x00000070,
1209 	0x5bc0, 0xf0311fff, 0x80300000,
1210 	0x98f8, 0x73773777, 0x12010001,
1211 	0x9b7c, 0x00ff0000, 0x00fc0000,
1212 	0x2f48, 0x73773777, 0x12010001,
1213 	0x8a14, 0xf000003f, 0x00000007,
1214 	0x8b24, 0xffffffff, 0x00ffffff,
1215 	0x28350, 0x3f3f3fff, 0x00000082,
1216 	0x28354, 0x0000003f, 0x00000000,
1217 	0x3e78, 0x00000001, 0x00000002,
1218 	0x913c, 0xffff03df, 0x00000004,
1219 	0xc768, 0x00000008, 0x00000008,
1220 	0x8c00, 0x000008ff, 0x00000800,
1221 	0x9508, 0x00010000, 0x00010000,
1222 	0xac0c, 0xffffffff, 0x54763210,
1223 	0x214f8, 0x01ff01ff, 0x00000002,
1224 	0x21498, 0x007ff800, 0x00200000,
1225 	0x2015c, 0xffffffff, 0x00000f40,
1226 	0x30934, 0xffffffff, 0x00000001
1227 };
1228 
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231 	0xc420, 0xffffffff, 0xfffffffc,
1232 	0x30800, 0xffffffff, 0xe0000000,
1233 	0x3c2a0, 0xffffffff, 0x00000100,
1234 	0x3c208, 0xffffffff, 0x00000100,
1235 	0x3c2c0, 0xffffffff, 0x00000100,
1236 	0x3c2c8, 0xffffffff, 0x00000100,
1237 	0x3c2c4, 0xffffffff, 0x00000100,
1238 	0x55e4, 0xffffffff, 0x00600100,
1239 	0x3c280, 0xffffffff, 0x00000100,
1240 	0x3c214, 0xffffffff, 0x06000100,
1241 	0x3c220, 0xffffffff, 0x00000100,
1242 	0x3c218, 0xffffffff, 0x06000100,
1243 	0x3c204, 0xffffffff, 0x00000100,
1244 	0x3c2e0, 0xffffffff, 0x00000100,
1245 	0x3c224, 0xffffffff, 0x00000100,
1246 	0x3c200, 0xffffffff, 0x00000100,
1247 	0x3c230, 0xffffffff, 0x00000100,
1248 	0x3c234, 0xffffffff, 0x00000100,
1249 	0x3c250, 0xffffffff, 0x00000100,
1250 	0x3c254, 0xffffffff, 0x00000100,
1251 	0x3c258, 0xffffffff, 0x00000100,
1252 	0x3c25c, 0xffffffff, 0x00000100,
1253 	0x3c260, 0xffffffff, 0x00000100,
1254 	0x3c27c, 0xffffffff, 0x00000100,
1255 	0x3c278, 0xffffffff, 0x00000100,
1256 	0x3c210, 0xffffffff, 0x06000100,
1257 	0x3c290, 0xffffffff, 0x00000100,
1258 	0x3c274, 0xffffffff, 0x00000100,
1259 	0x3c2b4, 0xffffffff, 0x00000100,
1260 	0x3c2b0, 0xffffffff, 0x00000100,
1261 	0x3c270, 0xffffffff, 0x00000100,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c020, 0xffffffff, 0x00010000,
1264 	0x3c024, 0xffffffff, 0x00030002,
1265 	0x3c028, 0xffffffff, 0x00040007,
1266 	0x3c02c, 0xffffffff, 0x00060005,
1267 	0x3c030, 0xffffffff, 0x00090008,
1268 	0x3c034, 0xffffffff, 0x00010000,
1269 	0x3c038, 0xffffffff, 0x00030002,
1270 	0x3c03c, 0xffffffff, 0x00040007,
1271 	0x3c040, 0xffffffff, 0x00060005,
1272 	0x3c044, 0xffffffff, 0x00090008,
1273 	0x3c048, 0xffffffff, 0x00010000,
1274 	0x3c04c, 0xffffffff, 0x00030002,
1275 	0x3c050, 0xffffffff, 0x00040007,
1276 	0x3c054, 0xffffffff, 0x00060005,
1277 	0x3c058, 0xffffffff, 0x00090008,
1278 	0x3c05c, 0xffffffff, 0x00010000,
1279 	0x3c060, 0xffffffff, 0x00030002,
1280 	0x3c064, 0xffffffff, 0x00040007,
1281 	0x3c068, 0xffffffff, 0x00060005,
1282 	0x3c06c, 0xffffffff, 0x00090008,
1283 	0x3c070, 0xffffffff, 0x00010000,
1284 	0x3c074, 0xffffffff, 0x00030002,
1285 	0x3c078, 0xffffffff, 0x00040007,
1286 	0x3c07c, 0xffffffff, 0x00060005,
1287 	0x3c080, 0xffffffff, 0x00090008,
1288 	0x3c084, 0xffffffff, 0x00010000,
1289 	0x3c088, 0xffffffff, 0x00030002,
1290 	0x3c08c, 0xffffffff, 0x00040007,
1291 	0x3c090, 0xffffffff, 0x00060005,
1292 	0x3c094, 0xffffffff, 0x00090008,
1293 	0x3c098, 0xffffffff, 0x00010000,
1294 	0x3c09c, 0xffffffff, 0x00030002,
1295 	0x3c0a0, 0xffffffff, 0x00040007,
1296 	0x3c0a4, 0xffffffff, 0x00060005,
1297 	0x3c0a8, 0xffffffff, 0x00090008,
1298 	0x3c0ac, 0xffffffff, 0x00010000,
1299 	0x3c0b0, 0xffffffff, 0x00030002,
1300 	0x3c0b4, 0xffffffff, 0x00040007,
1301 	0x3c0b8, 0xffffffff, 0x00060005,
1302 	0x3c0bc, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0xf90, 0xffffffff, 0x00000100,
1311 	0xf98, 0x00000101, 0x00000000,
1312 	0x20a8, 0xffffffff, 0x00000104,
1313 	0x55e4, 0xff000fff, 0x00000100,
1314 	0x30cc, 0xc0000fff, 0x00000104,
1315 	0xc1e4, 0x00000001, 0x00000001,
1316 	0xd00c, 0xff000ff0, 0x00000100,
1317 	0xd80c, 0xff000ff0, 0x00000100
1318 };
1319 
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322 	0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324 
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327 	0xc770, 0xffffffff, 0x00000800,
1328 	0xc774, 0xffffffff, 0x00000800,
1329 	0xc798, 0xffffffff, 0x00007fbf,
1330 	0xc79c, 0xffffffff, 0x00007faf
1331 };
1332 
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335 	0x3c000, 0xffffdfff, 0x6e944040,
1336 	0x55e4, 0xff607fff, 0xfc000100,
1337 	0x3c220, 0xff000fff, 0x00000100,
1338 	0x3c224, 0xff000fff, 0x00000100,
1339 	0x3c200, 0xfffc0fff, 0x00000100,
1340 	0x6ed8, 0x00010101, 0x00010000,
1341 	0x9830, 0xffffffff, 0x00000000,
1342 	0x9834, 0xf00fffff, 0x00000400,
1343 	0x5bb0, 0x000000f0, 0x00000070,
1344 	0x5bc0, 0xf0311fff, 0x80300000,
1345 	0x98f8, 0x73773777, 0x12010001,
1346 	0x98fc, 0xffffffff, 0x00000010,
1347 	0x9b7c, 0x00ff0000, 0x00fc0000,
1348 	0x8030, 0x00001f0f, 0x0000100a,
1349 	0x2f48, 0x73773777, 0x12010001,
1350 	0x2408, 0x000fffff, 0x000c007f,
1351 	0x8a14, 0xf000003f, 0x00000007,
1352 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x4d8, 0x00000fff, 0x00000100,
1356 	0x3e78, 0x00000001, 0x00000002,
1357 	0xc768, 0x00000008, 0x00000008,
1358 	0x8c00, 0x000000ff, 0x00000003,
1359 	0x214f8, 0x01ff01ff, 0x00000002,
1360 	0x21498, 0x007ff800, 0x00200000,
1361 	0x2015c, 0xffffffff, 0x00000f40,
1362 	0x88c4, 0x001f3ae3, 0x00000082,
1363 	0x88d4, 0x0000001f, 0x00000010,
1364 	0x30934, 0xffffffff, 0x00000000
1365 };
1366 
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369 	0xc420, 0xffffffff, 0xfffffffc,
1370 	0x30800, 0xffffffff, 0xe0000000,
1371 	0x3c2a0, 0xffffffff, 0x00000100,
1372 	0x3c208, 0xffffffff, 0x00000100,
1373 	0x3c2c0, 0xffffffff, 0x00000100,
1374 	0x3c2c8, 0xffffffff, 0x00000100,
1375 	0x3c2c4, 0xffffffff, 0x00000100,
1376 	0x55e4, 0xffffffff, 0x00600100,
1377 	0x3c280, 0xffffffff, 0x00000100,
1378 	0x3c214, 0xffffffff, 0x06000100,
1379 	0x3c220, 0xffffffff, 0x00000100,
1380 	0x3c218, 0xffffffff, 0x06000100,
1381 	0x3c204, 0xffffffff, 0x00000100,
1382 	0x3c2e0, 0xffffffff, 0x00000100,
1383 	0x3c224, 0xffffffff, 0x00000100,
1384 	0x3c200, 0xffffffff, 0x00000100,
1385 	0x3c230, 0xffffffff, 0x00000100,
1386 	0x3c234, 0xffffffff, 0x00000100,
1387 	0x3c250, 0xffffffff, 0x00000100,
1388 	0x3c254, 0xffffffff, 0x00000100,
1389 	0x3c258, 0xffffffff, 0x00000100,
1390 	0x3c25c, 0xffffffff, 0x00000100,
1391 	0x3c260, 0xffffffff, 0x00000100,
1392 	0x3c27c, 0xffffffff, 0x00000100,
1393 	0x3c278, 0xffffffff, 0x00000100,
1394 	0x3c210, 0xffffffff, 0x06000100,
1395 	0x3c290, 0xffffffff, 0x00000100,
1396 	0x3c274, 0xffffffff, 0x00000100,
1397 	0x3c2b4, 0xffffffff, 0x00000100,
1398 	0x3c2b0, 0xffffffff, 0x00000100,
1399 	0x3c270, 0xffffffff, 0x00000100,
1400 	0x30800, 0xffffffff, 0xe0000000,
1401 	0x3c020, 0xffffffff, 0x00010000,
1402 	0x3c024, 0xffffffff, 0x00030002,
1403 	0x3c028, 0xffffffff, 0x00040007,
1404 	0x3c02c, 0xffffffff, 0x00060005,
1405 	0x3c030, 0xffffffff, 0x00090008,
1406 	0x3c034, 0xffffffff, 0x00010000,
1407 	0x3c038, 0xffffffff, 0x00030002,
1408 	0x3c03c, 0xffffffff, 0x00040007,
1409 	0x3c040, 0xffffffff, 0x00060005,
1410 	0x3c044, 0xffffffff, 0x00090008,
1411 	0x3c000, 0xffffffff, 0x96e00200,
1412 	0x8708, 0xffffffff, 0x00900100,
1413 	0xc424, 0xffffffff, 0x0020003f,
1414 	0x38, 0xffffffff, 0x0140001c,
1415 	0x3c, 0x000f0000, 0x000f0000,
1416 	0x220, 0xffffffff, 0xC060000C,
1417 	0x224, 0xc0000fff, 0x00000100,
1418 	0x20a8, 0xffffffff, 0x00000104,
1419 	0x55e4, 0xff000fff, 0x00000100,
1420 	0x30cc, 0xc0000fff, 0x00000104,
1421 	0xc1e4, 0x00000001, 0x00000001,
1422 	0xd00c, 0xff000ff0, 0x00000100,
1423 	0xd80c, 0xff000ff0, 0x00000100
1424 };
1425 
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428 	0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430 
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433 	0x30800, 0xffffffff, 0xe0000000,
1434 	0x28350, 0xffffffff, 0x3a00161a,
1435 	0x28354, 0xffffffff, 0x0000002e,
1436 	0x9a10, 0xffffffff, 0x00018208,
1437 	0x98f8, 0xffffffff, 0x12011003
1438 };
1439 
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442 	0x3354, 0x00000333, 0x00000333,
1443 	0x9a10, 0x00010000, 0x00058208,
1444 	0x9830, 0xffffffff, 0x00000000,
1445 	0x9834, 0xf00fffff, 0x00000400,
1446 	0x9838, 0x0002021c, 0x00020200,
1447 	0xc78, 0x00000080, 0x00000000,
1448 	0x5bb0, 0x000000f0, 0x00000070,
1449 	0x5bc0, 0xf0311fff, 0x80300000,
1450 	0x350c, 0x00810000, 0x408af000,
1451 	0x7030, 0x31000111, 0x00000011,
1452 	0x2f48, 0x73773777, 0x12010001,
1453 	0x2120, 0x0000007f, 0x0000001b,
1454 	0x21dc, 0x00007fb6, 0x00002191,
1455 	0x3628, 0x0000003f, 0x0000000a,
1456 	0x362c, 0x0000003f, 0x0000000a,
1457 	0x2ae4, 0x00073ffe, 0x000022a2,
1458 	0x240c, 0x000007ff, 0x00000000,
1459 	0x8bf0, 0x00002001, 0x00000001,
1460 	0x8b24, 0xffffffff, 0x00ffffff,
1461 	0x30a04, 0x0000ff0f, 0x00000000,
1462 	0x28a4c, 0x07ffffff, 0x06000000,
1463 	0x3e78, 0x00000001, 0x00000002,
1464 	0xc768, 0x00000008, 0x00000008,
1465 	0xc770, 0x00000f00, 0x00000800,
1466 	0xc774, 0x00000f00, 0x00000800,
1467 	0xc798, 0x00ffffff, 0x00ff7fbf,
1468 	0xc79c, 0x00ffffff, 0x00ff7faf,
1469 	0x8c00, 0x000000ff, 0x00000800,
1470 	0xe40, 0x00001fff, 0x00001fff,
1471 	0x9060, 0x0000007f, 0x00000020,
1472 	0x9508, 0x00010000, 0x00010000,
1473 	0xae00, 0x00100000, 0x000ff07c,
1474 	0xac14, 0x000003ff, 0x0000000f,
1475 	0xac10, 0xffffffff, 0x7564fdec,
1476 	0xac0c, 0xffffffff, 0x3120b9a8,
1477 	0xac08, 0x20000000, 0x0f9c0000
1478 };
1479 
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482 	0xc420, 0xffffffff, 0xfffffffd,
1483 	0x30800, 0xffffffff, 0xe0000000,
1484 	0x3c2a0, 0xffffffff, 0x00000100,
1485 	0x3c208, 0xffffffff, 0x00000100,
1486 	0x3c2c0, 0xffffffff, 0x00000100,
1487 	0x3c2c8, 0xffffffff, 0x00000100,
1488 	0x3c2c4, 0xffffffff, 0x00000100,
1489 	0x55e4, 0xffffffff, 0x00200100,
1490 	0x3c280, 0xffffffff, 0x00000100,
1491 	0x3c214, 0xffffffff, 0x06000100,
1492 	0x3c220, 0xffffffff, 0x00000100,
1493 	0x3c218, 0xffffffff, 0x06000100,
1494 	0x3c204, 0xffffffff, 0x00000100,
1495 	0x3c2e0, 0xffffffff, 0x00000100,
1496 	0x3c224, 0xffffffff, 0x00000100,
1497 	0x3c200, 0xffffffff, 0x00000100,
1498 	0x3c230, 0xffffffff, 0x00000100,
1499 	0x3c234, 0xffffffff, 0x00000100,
1500 	0x3c250, 0xffffffff, 0x00000100,
1501 	0x3c254, 0xffffffff, 0x00000100,
1502 	0x3c258, 0xffffffff, 0x00000100,
1503 	0x3c25c, 0xffffffff, 0x00000100,
1504 	0x3c260, 0xffffffff, 0x00000100,
1505 	0x3c27c, 0xffffffff, 0x00000100,
1506 	0x3c278, 0xffffffff, 0x00000100,
1507 	0x3c210, 0xffffffff, 0x06000100,
1508 	0x3c290, 0xffffffff, 0x00000100,
1509 	0x3c274, 0xffffffff, 0x00000100,
1510 	0x3c2b4, 0xffffffff, 0x00000100,
1511 	0x3c2b0, 0xffffffff, 0x00000100,
1512 	0x3c270, 0xffffffff, 0x00000100,
1513 	0x30800, 0xffffffff, 0xe0000000,
1514 	0x3c020, 0xffffffff, 0x00010000,
1515 	0x3c024, 0xffffffff, 0x00030002,
1516 	0x3c028, 0xffffffff, 0x00040007,
1517 	0x3c02c, 0xffffffff, 0x00060005,
1518 	0x3c030, 0xffffffff, 0x00090008,
1519 	0x3c034, 0xffffffff, 0x00010000,
1520 	0x3c038, 0xffffffff, 0x00030002,
1521 	0x3c03c, 0xffffffff, 0x00040007,
1522 	0x3c040, 0xffffffff, 0x00060005,
1523 	0x3c044, 0xffffffff, 0x00090008,
1524 	0x3c048, 0xffffffff, 0x00010000,
1525 	0x3c04c, 0xffffffff, 0x00030002,
1526 	0x3c050, 0xffffffff, 0x00040007,
1527 	0x3c054, 0xffffffff, 0x00060005,
1528 	0x3c058, 0xffffffff, 0x00090008,
1529 	0x3c05c, 0xffffffff, 0x00010000,
1530 	0x3c060, 0xffffffff, 0x00030002,
1531 	0x3c064, 0xffffffff, 0x00040007,
1532 	0x3c068, 0xffffffff, 0x00060005,
1533 	0x3c06c, 0xffffffff, 0x00090008,
1534 	0x3c070, 0xffffffff, 0x00010000,
1535 	0x3c074, 0xffffffff, 0x00030002,
1536 	0x3c078, 0xffffffff, 0x00040007,
1537 	0x3c07c, 0xffffffff, 0x00060005,
1538 	0x3c080, 0xffffffff, 0x00090008,
1539 	0x3c084, 0xffffffff, 0x00010000,
1540 	0x3c088, 0xffffffff, 0x00030002,
1541 	0x3c08c, 0xffffffff, 0x00040007,
1542 	0x3c090, 0xffffffff, 0x00060005,
1543 	0x3c094, 0xffffffff, 0x00090008,
1544 	0x3c098, 0xffffffff, 0x00010000,
1545 	0x3c09c, 0xffffffff, 0x00030002,
1546 	0x3c0a0, 0xffffffff, 0x00040007,
1547 	0x3c0a4, 0xffffffff, 0x00060005,
1548 	0x3c0a8, 0xffffffff, 0x00090008,
1549 	0x3c0ac, 0xffffffff, 0x00010000,
1550 	0x3c0b0, 0xffffffff, 0x00030002,
1551 	0x3c0b4, 0xffffffff, 0x00040007,
1552 	0x3c0b8, 0xffffffff, 0x00060005,
1553 	0x3c0bc, 0xffffffff, 0x00090008,
1554 	0x3c0c0, 0xffffffff, 0x00010000,
1555 	0x3c0c4, 0xffffffff, 0x00030002,
1556 	0x3c0c8, 0xffffffff, 0x00040007,
1557 	0x3c0cc, 0xffffffff, 0x00060005,
1558 	0x3c0d0, 0xffffffff, 0x00090008,
1559 	0x3c0d4, 0xffffffff, 0x00010000,
1560 	0x3c0d8, 0xffffffff, 0x00030002,
1561 	0x3c0dc, 0xffffffff, 0x00040007,
1562 	0x3c0e0, 0xffffffff, 0x00060005,
1563 	0x3c0e4, 0xffffffff, 0x00090008,
1564 	0x3c0e8, 0xffffffff, 0x00010000,
1565 	0x3c0ec, 0xffffffff, 0x00030002,
1566 	0x3c0f0, 0xffffffff, 0x00040007,
1567 	0x3c0f4, 0xffffffff, 0x00060005,
1568 	0x3c0f8, 0xffffffff, 0x00090008,
1569 	0xc318, 0xffffffff, 0x00020200,
1570 	0x3350, 0xffffffff, 0x00000200,
1571 	0x15c0, 0xffffffff, 0x00000400,
1572 	0x55e8, 0xffffffff, 0x00000000,
1573 	0x2f50, 0xffffffff, 0x00000902,
1574 	0x3c000, 0xffffffff, 0x96940200,
1575 	0x8708, 0xffffffff, 0x00900100,
1576 	0xc424, 0xffffffff, 0x0020003f,
1577 	0x38, 0xffffffff, 0x0140001c,
1578 	0x3c, 0x000f0000, 0x000f0000,
1579 	0x220, 0xffffffff, 0xc060000c,
1580 	0x224, 0xc0000fff, 0x00000100,
1581 	0xf90, 0xffffffff, 0x00000100,
1582 	0xf98, 0x00000101, 0x00000000,
1583 	0x20a8, 0xffffffff, 0x00000104,
1584 	0x55e4, 0xff000fff, 0x00000100,
1585 	0x30cc, 0xc0000fff, 0x00000104,
1586 	0xc1e4, 0x00000001, 0x00000001,
1587 	0xd00c, 0xff000ff0, 0x00000100,
1588 	0xd80c, 0xff000ff0, 0x00000100
1589 };
1590 
1591 static const u32 godavari_golden_registers[] =
1592 {
1593 	0x55e4, 0xff607fff, 0xfc000100,
1594 	0x6ed8, 0x00010101, 0x00010000,
1595 	0x9830, 0xffffffff, 0x00000000,
1596 	0x98302, 0xf00fffff, 0x00000400,
1597 	0x6130, 0xffffffff, 0x00010000,
1598 	0x5bb0, 0x000000f0, 0x00000070,
1599 	0x5bc0, 0xf0311fff, 0x80300000,
1600 	0x98f8, 0x73773777, 0x12010001,
1601 	0x98fc, 0xffffffff, 0x00000010,
1602 	0x8030, 0x00001f0f, 0x0000100a,
1603 	0x2f48, 0x73773777, 0x12010001,
1604 	0x2408, 0x000fffff, 0x000c007f,
1605 	0x8a14, 0xf000003f, 0x00000007,
1606 	0x8b24, 0xffffffff, 0x00ff0fff,
1607 	0x30a04, 0x0000ff0f, 0x00000000,
1608 	0x28a4c, 0x07ffffff, 0x06000000,
1609 	0x4d8, 0x00000fff, 0x00000100,
1610 	0xd014, 0x00010000, 0x00810001,
1611 	0xd814, 0x00010000, 0x00810001,
1612 	0x3e78, 0x00000001, 0x00000002,
1613 	0xc768, 0x00000008, 0x00000008,
1614 	0xc770, 0x00000f00, 0x00000800,
1615 	0xc774, 0x00000f00, 0x00000800,
1616 	0xc798, 0x00ffffff, 0x00ff7fbf,
1617 	0xc79c, 0x00ffffff, 0x00ff7faf,
1618 	0x8c00, 0x000000ff, 0x00000001,
1619 	0x214f8, 0x01ff01ff, 0x00000002,
1620 	0x21498, 0x007ff800, 0x00200000,
1621 	0x2015c, 0xffffffff, 0x00000f40,
1622 	0x88c4, 0x001f3ae3, 0x00000082,
1623 	0x88d4, 0x0000001f, 0x00000010,
1624 	0x30934, 0xffffffff, 0x00000000
1625 };
1626 
1627 
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1631 	mutex_lock(&rdev->grbm_idx_mutex);
1632 	switch (rdev->family) {
1633 	case CHIP_BONAIRE:
1634 		radeon_program_register_sequence(rdev,
1635 						 bonaire_mgcg_cgcg_init,
1636 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1637 		radeon_program_register_sequence(rdev,
1638 						 bonaire_golden_registers,
1639 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1640 		radeon_program_register_sequence(rdev,
1641 						 bonaire_golden_common_registers,
1642 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 bonaire_golden_spm_registers,
1645 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1646 		break;
1647 	case CHIP_KABINI:
1648 		radeon_program_register_sequence(rdev,
1649 						 kalindi_mgcg_cgcg_init,
1650 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651 		radeon_program_register_sequence(rdev,
1652 						 kalindi_golden_registers,
1653 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1654 		radeon_program_register_sequence(rdev,
1655 						 kalindi_golden_common_registers,
1656 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 kalindi_golden_spm_registers,
1659 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660 		break;
1661 	case CHIP_MULLINS:
1662 		radeon_program_register_sequence(rdev,
1663 						 kalindi_mgcg_cgcg_init,
1664 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1665 		radeon_program_register_sequence(rdev,
1666 						 godavari_golden_registers,
1667 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1668 		radeon_program_register_sequence(rdev,
1669 						 kalindi_golden_common_registers,
1670 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 kalindi_golden_spm_registers,
1673 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1674 		break;
1675 	case CHIP_KAVERI:
1676 		radeon_program_register_sequence(rdev,
1677 						 spectre_mgcg_cgcg_init,
1678 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1679 		radeon_program_register_sequence(rdev,
1680 						 spectre_golden_registers,
1681 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1682 		radeon_program_register_sequence(rdev,
1683 						 spectre_golden_common_registers,
1684 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1685 		radeon_program_register_sequence(rdev,
1686 						 spectre_golden_spm_registers,
1687 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1688 		break;
1689 	case CHIP_HAWAII:
1690 		radeon_program_register_sequence(rdev,
1691 						 hawaii_mgcg_cgcg_init,
1692 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1693 		radeon_program_register_sequence(rdev,
1694 						 hawaii_golden_registers,
1695 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1696 		radeon_program_register_sequence(rdev,
1697 						 hawaii_golden_common_registers,
1698 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1699 		radeon_program_register_sequence(rdev,
1700 						 hawaii_golden_spm_registers,
1701 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1702 		break;
1703 	default:
1704 		break;
1705 	}
1706 	mutex_unlock(&rdev->grbm_idx_mutex);
1707 }
1708 
1709 /**
1710  * cik_get_xclk - get the xclk
1711  *
1712  * @rdev: radeon_device pointer
1713  *
1714  * Returns the reference clock used by the gfx engine
1715  * (CIK).
1716  */
1717 u32 cik_get_xclk(struct radeon_device *rdev)
1718 {
1719 	u32 reference_clock = rdev->clock.spll.reference_freq;
1720 
1721 	if (rdev->flags & RADEON_IS_IGP) {
1722 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1723 			return reference_clock / 2;
1724 	} else {
1725 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1726 			return reference_clock / 4;
1727 	}
1728 	return reference_clock;
1729 }
1730 
1731 /**
1732  * cik_mm_rdoorbell - read a doorbell dword
1733  *
1734  * @rdev: radeon_device pointer
1735  * @index: doorbell index
1736  *
1737  * Returns the value in the doorbell aperture at the
1738  * requested doorbell index (CIK).
1739  */
1740 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1741 {
1742 	if (index < rdev->doorbell.num_doorbells) {
1743 		return readl(rdev->doorbell.ptr + index);
1744 	} else {
1745 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1746 		return 0;
1747 	}
1748 }
1749 
1750 /**
1751  * cik_mm_wdoorbell - write a doorbell dword
1752  *
1753  * @rdev: radeon_device pointer
1754  * @index: doorbell index
1755  * @v: value to write
1756  *
1757  * Writes @v to the doorbell aperture at the
1758  * requested doorbell index (CIK).
1759  */
1760 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1761 {
1762 	if (index < rdev->doorbell.num_doorbells) {
1763 		writel(v, rdev->doorbell.ptr + index);
1764 	} else {
1765 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1766 	}
1767 }
1768 
1769 #define BONAIRE_IO_MC_REGS_SIZE 36
1770 
1771 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1772 {
1773 	{0x00000070, 0x04400000},
1774 	{0x00000071, 0x80c01803},
1775 	{0x00000072, 0x00004004},
1776 	{0x00000073, 0x00000100},
1777 	{0x00000074, 0x00ff0000},
1778 	{0x00000075, 0x34000000},
1779 	{0x00000076, 0x08000014},
1780 	{0x00000077, 0x00cc08ec},
1781 	{0x00000078, 0x00000400},
1782 	{0x00000079, 0x00000000},
1783 	{0x0000007a, 0x04090000},
1784 	{0x0000007c, 0x00000000},
1785 	{0x0000007e, 0x4408a8e8},
1786 	{0x0000007f, 0x00000304},
1787 	{0x00000080, 0x00000000},
1788 	{0x00000082, 0x00000001},
1789 	{0x00000083, 0x00000002},
1790 	{0x00000084, 0xf3e4f400},
1791 	{0x00000085, 0x052024e3},
1792 	{0x00000087, 0x00000000},
1793 	{0x00000088, 0x01000000},
1794 	{0x0000008a, 0x1c0a0000},
1795 	{0x0000008b, 0xff010000},
1796 	{0x0000008d, 0xffffefff},
1797 	{0x0000008e, 0xfff3efff},
1798 	{0x0000008f, 0xfff3efbf},
1799 	{0x00000092, 0xf7ffffff},
1800 	{0x00000093, 0xffffff7f},
1801 	{0x00000095, 0x00101101},
1802 	{0x00000096, 0x00000fff},
1803 	{0x00000097, 0x00116fff},
1804 	{0x00000098, 0x60010000},
1805 	{0x00000099, 0x10010000},
1806 	{0x0000009a, 0x00006000},
1807 	{0x0000009b, 0x00001000},
1808 	{0x0000009f, 0x00b48000}
1809 };
1810 
1811 #define HAWAII_IO_MC_REGS_SIZE 22
1812 
1813 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1814 {
1815 	{0x0000007d, 0x40000000},
1816 	{0x0000007e, 0x40180304},
1817 	{0x0000007f, 0x0000ff00},
1818 	{0x00000081, 0x00000000},
1819 	{0x00000083, 0x00000800},
1820 	{0x00000086, 0x00000000},
1821 	{0x00000087, 0x00000100},
1822 	{0x00000088, 0x00020100},
1823 	{0x00000089, 0x00000000},
1824 	{0x0000008b, 0x00040000},
1825 	{0x0000008c, 0x00000100},
1826 	{0x0000008e, 0xff010000},
1827 	{0x00000090, 0xffffefff},
1828 	{0x00000091, 0xfff3efff},
1829 	{0x00000092, 0xfff3efbf},
1830 	{0x00000093, 0xf7ffffff},
1831 	{0x00000094, 0xffffff7f},
1832 	{0x00000095, 0x00000fff},
1833 	{0x00000096, 0x00116fff},
1834 	{0x00000097, 0x60010000},
1835 	{0x00000098, 0x10010000},
1836 	{0x0000009f, 0x00c79000}
1837 };
1838 
1839 
1840 /**
1841  * cik_srbm_select - select specific register instances
1842  *
1843  * @rdev: radeon_device pointer
1844  * @me: selected ME (micro engine)
1845  * @pipe: pipe
1846  * @queue: queue
1847  * @vmid: VMID
1848  *
1849  * Switches the currently active registers instances.  Some
1850  * registers are instanced per VMID, others are instanced per
1851  * me/pipe/queue combination.
1852  */
1853 static void cik_srbm_select(struct radeon_device *rdev,
1854 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1855 {
1856 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1857 			     MEID(me & 0x3) |
1858 			     VMID(vmid & 0xf) |
1859 			     QUEUEID(queue & 0x7));
1860 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1861 }
1862 
1863 /* ucode loading */
1864 /**
1865  * ci_mc_load_microcode - load MC ucode into the hw
1866  *
1867  * @rdev: radeon_device pointer
1868  *
1869  * Load the GDDR MC ucode into the hw (CIK).
1870  * Returns 0 on success, error on failure.
1871  */
1872 int ci_mc_load_microcode(struct radeon_device *rdev)
1873 {
1874 	const __be32 *fw_data = NULL;
1875 	const __le32 *new_fw_data = NULL;
1876 	u32 running, tmp;
1877 	u32 *io_mc_regs = NULL;
1878 	const __le32 *new_io_mc_regs = NULL;
1879 	int i, regs_size, ucode_size;
1880 
1881 	if (!rdev->mc_fw)
1882 		return -EINVAL;
1883 
1884 	if (rdev->new_fw) {
1885 		const struct mc_firmware_header_v1_0 *hdr =
1886 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1887 
1888 		radeon_ucode_print_mc_hdr(&hdr->header);
1889 
1890 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1891 		new_io_mc_regs = (const __le32 *)
1892 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1893 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1894 		new_fw_data = (const __le32 *)
1895 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1896 	} else {
1897 		ucode_size = rdev->mc_fw->size / 4;
1898 
1899 		switch (rdev->family) {
1900 		case CHIP_BONAIRE:
1901 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1902 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1903 			break;
1904 		case CHIP_HAWAII:
1905 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1906 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1907 			break;
1908 		default:
1909 			return -EINVAL;
1910 		}
1911 		fw_data = (const __be32 *)rdev->mc_fw->data;
1912 	}
1913 
1914 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1915 
1916 	if (running == 0) {
1917 		/* reset the engine and set to writable */
1918 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1920 
1921 		/* load mc io regs */
1922 		for (i = 0; i < regs_size; i++) {
1923 			if (rdev->new_fw) {
1924 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1925 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1926 			} else {
1927 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1928 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1929 			}
1930 		}
1931 
1932 		tmp = RREG32(MC_SEQ_MISC0);
1933 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1934 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1935 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1936 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1937 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1938 		}
1939 
1940 		/* load the MC ucode */
1941 		for (i = 0; i < ucode_size; i++) {
1942 			if (rdev->new_fw)
1943 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1944 			else
1945 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1946 		}
1947 
1948 		/* put the engine back into the active state */
1949 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1952 
1953 		/* wait for training to complete */
1954 		for (i = 0; i < rdev->usec_timeout; i++) {
1955 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1956 				break;
1957 			udelay(1);
1958 		}
1959 		for (i = 0; i < rdev->usec_timeout; i++) {
1960 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1961 				break;
1962 			udelay(1);
1963 		}
1964 	}
1965 
1966 	return 0;
1967 }
1968 
1969 /**
1970  * cik_init_microcode - load ucode images from disk
1971  *
1972  * @rdev: radeon_device pointer
1973  *
1974  * Use the firmware interface to load the ucode images into
1975  * the driver (not loaded into hw).
1976  * Returns 0 on success, error on failure.
1977  */
1978 static int cik_init_microcode(struct radeon_device *rdev)
1979 {
1980 	const char *chip_name;
1981 	const char *new_chip_name;
1982 	size_t pfp_req_size, me_req_size, ce_req_size,
1983 		mec_req_size, rlc_req_size, mc_req_size = 0,
1984 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1985 	char fw_name[30];
1986 	int new_fw = 0;
1987 	int err;
1988 	int num_fw;
1989 	bool new_smc = false;
1990 
1991 	DRM_DEBUG("\n");
1992 
1993 	switch (rdev->family) {
1994 	case CHIP_BONAIRE:
1995 		chip_name = "BONAIRE";
1996 		if ((rdev->pdev->revision == 0x80) ||
1997 		    (rdev->pdev->revision == 0x81) ||
1998 		    (rdev->pdev->device == 0x665f))
1999 			new_smc = true;
2000 		new_chip_name = "bonaire";
2001 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2002 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2003 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2004 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2005 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2006 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2007 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2008 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2010 		num_fw = 8;
2011 		break;
2012 	case CHIP_HAWAII:
2013 		chip_name = "HAWAII";
2014 		if (rdev->pdev->revision == 0x80)
2015 			new_smc = true;
2016 		new_chip_name = "hawaii";
2017 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2019 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2022 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2023 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2024 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2026 		num_fw = 8;
2027 		break;
2028 	case CHIP_KAVERI:
2029 		chip_name = "KAVERI";
2030 		new_chip_name = "kaveri";
2031 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2033 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2036 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2037 		num_fw = 7;
2038 		break;
2039 	case CHIP_KABINI:
2040 		chip_name = "KABINI";
2041 		new_chip_name = "kabini";
2042 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2043 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2044 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2045 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2046 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2047 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2048 		num_fw = 6;
2049 		break;
2050 	case CHIP_MULLINS:
2051 		chip_name = "MULLINS";
2052 		new_chip_name = "mullins";
2053 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2054 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2055 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2056 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2057 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2058 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2059 		num_fw = 6;
2060 		break;
2061 	default: BUG();
2062 	}
2063 
2064 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2065 
2066 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2067 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 	if (err) {
2069 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2070 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2071 		if (err)
2072 			goto out;
2073 		if (rdev->pfp_fw->size != pfp_req_size) {
2074 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2075 			       rdev->pfp_fw->size, fw_name);
2076 			err = -EINVAL;
2077 			goto out;
2078 		}
2079 	} else {
2080 		err = radeon_ucode_validate(rdev->pfp_fw);
2081 		if (err) {
2082 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2083 			       fw_name);
2084 			goto out;
2085 		} else {
2086 			new_fw++;
2087 		}
2088 	}
2089 
2090 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092 	if (err) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095 		if (err)
2096 			goto out;
2097 		if (rdev->me_fw->size != me_req_size) {
2098 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2106 			       fw_name);
2107 			goto out;
2108 		} else {
2109 			new_fw++;
2110 		}
2111 	}
2112 
2113 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2114 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115 	if (err) {
2116 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2117 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2118 		if (err)
2119 			goto out;
2120 		if (rdev->ce_fw->size != ce_req_size) {
2121 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2122 			       rdev->ce_fw->size, fw_name);
2123 			err = -EINVAL;
2124 		}
2125 	} else {
2126 		err = radeon_ucode_validate(rdev->ce_fw);
2127 		if (err) {
2128 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2129 			       fw_name);
2130 			goto out;
2131 		} else {
2132 			new_fw++;
2133 		}
2134 	}
2135 
2136 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2137 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138 	if (err) {
2139 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2140 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 		if (err)
2142 			goto out;
2143 		if (rdev->mec_fw->size != mec_req_size) {
2144 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2145 			       rdev->mec_fw->size, fw_name);
2146 			err = -EINVAL;
2147 		}
2148 	} else {
2149 		err = radeon_ucode_validate(rdev->mec_fw);
2150 		if (err) {
2151 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2152 			       fw_name);
2153 			goto out;
2154 		} else {
2155 			new_fw++;
2156 		}
2157 	}
2158 
2159 	if (rdev->family == CHIP_KAVERI) {
2160 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2161 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2162 		if (err) {
2163 			goto out;
2164 		} else {
2165 			err = radeon_ucode_validate(rdev->mec2_fw);
2166 			if (err) {
2167 				goto out;
2168 			} else {
2169 				new_fw++;
2170 			}
2171 		}
2172 	}
2173 
2174 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2175 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176 	if (err) {
2177 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2178 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2179 		if (err)
2180 			goto out;
2181 		if (rdev->rlc_fw->size != rlc_req_size) {
2182 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2183 			       rdev->rlc_fw->size, fw_name);
2184 			err = -EINVAL;
2185 		}
2186 	} else {
2187 		err = radeon_ucode_validate(rdev->rlc_fw);
2188 		if (err) {
2189 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2190 			       fw_name);
2191 			goto out;
2192 		} else {
2193 			new_fw++;
2194 		}
2195 	}
2196 
2197 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2198 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199 	if (err) {
2200 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2201 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2202 		if (err)
2203 			goto out;
2204 		if (rdev->sdma_fw->size != sdma_req_size) {
2205 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2206 			       rdev->sdma_fw->size, fw_name);
2207 			err = -EINVAL;
2208 		}
2209 	} else {
2210 		err = radeon_ucode_validate(rdev->sdma_fw);
2211 		if (err) {
2212 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2213 			       fw_name);
2214 			goto out;
2215 		} else {
2216 			new_fw++;
2217 		}
2218 	}
2219 
2220 	/* No SMC, MC ucode on APUs */
2221 	if (!(rdev->flags & RADEON_IS_IGP)) {
2222 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2223 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224 		if (err) {
2225 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2226 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227 			if (err) {
2228 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2229 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2230 				if (err)
2231 					goto out;
2232 			}
2233 			if ((rdev->mc_fw->size != mc_req_size) &&
2234 			    (rdev->mc_fw->size != mc2_req_size)){
2235 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2236 				       rdev->mc_fw->size, fw_name);
2237 				err = -EINVAL;
2238 			}
2239 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2240 		} else {
2241 			err = radeon_ucode_validate(rdev->mc_fw);
2242 			if (err) {
2243 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2244 				       fw_name);
2245 				goto out;
2246 			} else {
2247 				new_fw++;
2248 			}
2249 		}
2250 
2251 		if (new_smc)
2252 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2253 		else
2254 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2255 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256 		if (err) {
2257 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2258 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2259 			if (err) {
2260 				pr_err("smc: error loading firmware \"%s\"\n",
2261 				       fw_name);
2262 				release_firmware(rdev->smc_fw);
2263 				rdev->smc_fw = NULL;
2264 				err = 0;
2265 			} else if (rdev->smc_fw->size != smc_req_size) {
2266 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2267 				       rdev->smc_fw->size, fw_name);
2268 				err = -EINVAL;
2269 			}
2270 		} else {
2271 			err = radeon_ucode_validate(rdev->smc_fw);
2272 			if (err) {
2273 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2274 				       fw_name);
2275 				goto out;
2276 			} else {
2277 				new_fw++;
2278 			}
2279 		}
2280 	}
2281 
2282 	if (new_fw == 0) {
2283 		rdev->new_fw = false;
2284 	} else if (new_fw < num_fw) {
2285 		pr_err("ci_fw: mixing new and old firmware!\n");
2286 		err = -EINVAL;
2287 	} else {
2288 		rdev->new_fw = true;
2289 	}
2290 
2291 out:
2292 	if (err) {
2293 		if (err != -EINVAL)
2294 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2295 			       fw_name);
2296 		release_firmware(rdev->pfp_fw);
2297 		rdev->pfp_fw = NULL;
2298 		release_firmware(rdev->me_fw);
2299 		rdev->me_fw = NULL;
2300 		release_firmware(rdev->ce_fw);
2301 		rdev->ce_fw = NULL;
2302 		release_firmware(rdev->mec_fw);
2303 		rdev->mec_fw = NULL;
2304 		release_firmware(rdev->mec2_fw);
2305 		rdev->mec2_fw = NULL;
2306 		release_firmware(rdev->rlc_fw);
2307 		rdev->rlc_fw = NULL;
2308 		release_firmware(rdev->sdma_fw);
2309 		rdev->sdma_fw = NULL;
2310 		release_firmware(rdev->mc_fw);
2311 		rdev->mc_fw = NULL;
2312 		release_firmware(rdev->smc_fw);
2313 		rdev->smc_fw = NULL;
2314 	}
2315 	return err;
2316 }
2317 
2318 /*
2319  * Core functions
2320  */
2321 /**
2322  * cik_tiling_mode_table_init - init the hw tiling table
2323  *
2324  * @rdev: radeon_device pointer
2325  *
2326  * Starting with SI, the tiling setup is done globally in a
2327  * set of 32 tiling modes.  Rather than selecting each set of
2328  * parameters per surface as on older asics, we just select
2329  * which index in the tiling table we want to use, and the
2330  * surface uses those parameters (CIK).
2331  */
2332 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2333 {
2334 	u32 *tile = rdev->config.cik.tile_mode_array;
2335 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2336 	const u32 num_tile_mode_states =
2337 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2338 	const u32 num_secondary_tile_mode_states =
2339 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2340 	u32 reg_offset, split_equal_to_row_size;
2341 	u32 num_pipe_configs;
2342 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2343 		rdev->config.cik.max_shader_engines;
2344 
2345 	switch (rdev->config.cik.mem_row_size_in_kb) {
2346 	case 1:
2347 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2348 		break;
2349 	case 2:
2350 	default:
2351 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2352 		break;
2353 	case 4:
2354 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2355 		break;
2356 	}
2357 
2358 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2359 	if (num_pipe_configs > 8)
2360 		num_pipe_configs = 16;
2361 
2362 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363 		tile[reg_offset] = 0;
2364 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2365 		macrotile[reg_offset] = 0;
2366 
2367 	switch(num_pipe_configs) {
2368 	case 16:
2369 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2373 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2377 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2379 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2381 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2385 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   TILE_SPLIT(split_equal_to_row_size));
2389 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2392 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2393 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2396 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2398 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 			   TILE_SPLIT(split_equal_to_row_size));
2400 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2401 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2402 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2405 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2412 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2420 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2435 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2442 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2444 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 
2448 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451 			   NUM_BANKS(ADDR_SURF_16_BANK));
2452 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455 			   NUM_BANKS(ADDR_SURF_16_BANK));
2456 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459 			   NUM_BANKS(ADDR_SURF_16_BANK));
2460 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 			   NUM_BANKS(ADDR_SURF_16_BANK));
2464 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 			   NUM_BANKS(ADDR_SURF_8_BANK));
2468 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 			   NUM_BANKS(ADDR_SURF_4_BANK));
2472 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			   NUM_BANKS(ADDR_SURF_2_BANK));
2476 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 			   NUM_BANKS(ADDR_SURF_16_BANK));
2480 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483 			   NUM_BANKS(ADDR_SURF_16_BANK));
2484 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			    NUM_BANKS(ADDR_SURF_16_BANK));
2488 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			    NUM_BANKS(ADDR_SURF_8_BANK));
2492 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 			    NUM_BANKS(ADDR_SURF_4_BANK));
2496 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 			    NUM_BANKS(ADDR_SURF_2_BANK));
2500 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 			    NUM_BANKS(ADDR_SURF_2_BANK));
2504 
2505 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2506 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2507 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2509 		break;
2510 
2511 	case 8:
2512 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2516 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2520 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2524 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2528 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   TILE_SPLIT(split_equal_to_row_size));
2532 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2536 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2540 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 			   TILE_SPLIT(split_equal_to_row_size));
2543 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2545 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2548 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2557 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2560 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2563 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2570 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2578 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 
2591 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594 				NUM_BANKS(ADDR_SURF_16_BANK));
2595 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2598 				NUM_BANKS(ADDR_SURF_16_BANK));
2599 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2602 				NUM_BANKS(ADDR_SURF_16_BANK));
2603 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606 				NUM_BANKS(ADDR_SURF_16_BANK));
2607 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610 				NUM_BANKS(ADDR_SURF_8_BANK));
2611 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614 				NUM_BANKS(ADDR_SURF_4_BANK));
2615 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2618 				NUM_BANKS(ADDR_SURF_2_BANK));
2619 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638 				NUM_BANKS(ADDR_SURF_8_BANK));
2639 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 				NUM_BANKS(ADDR_SURF_4_BANK));
2643 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646 				NUM_BANKS(ADDR_SURF_2_BANK));
2647 
2648 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2649 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2650 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2652 		break;
2653 
2654 	case 4:
2655 		if (num_rbs == 4) {
2656 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   TILE_SPLIT(split_equal_to_row_size));
2676 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 			   TILE_SPLIT(split_equal_to_row_size));
2687 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2689 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 
2735 		} else if (num_rbs < 4) {
2736 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2740 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2744 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2746 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2748 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2752 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(split_equal_to_row_size));
2756 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2760 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2761 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2762 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2763 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2764 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766 			   TILE_SPLIT(split_equal_to_row_size));
2767 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2769 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2772 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2774 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2776 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2778 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2781 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2782 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2785 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2787 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2789 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2792 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2796 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2797 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2802 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2804 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 		}
2815 
2816 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819 				NUM_BANKS(ADDR_SURF_16_BANK));
2820 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2823 				NUM_BANKS(ADDR_SURF_16_BANK));
2824 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827 				NUM_BANKS(ADDR_SURF_16_BANK));
2828 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 				NUM_BANKS(ADDR_SURF_8_BANK));
2840 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2843 				NUM_BANKS(ADDR_SURF_4_BANK));
2844 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859 				NUM_BANKS(ADDR_SURF_16_BANK));
2860 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863 				NUM_BANKS(ADDR_SURF_16_BANK));
2864 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867 				NUM_BANKS(ADDR_SURF_8_BANK));
2868 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2871 				NUM_BANKS(ADDR_SURF_4_BANK));
2872 
2873 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2874 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2875 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2876 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2877 		break;
2878 
2879 	case 2:
2880 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882 			   PIPE_CONFIG(ADDR_SURF_P2) |
2883 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2884 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 			   PIPE_CONFIG(ADDR_SURF_P2) |
2887 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2890 			   PIPE_CONFIG(ADDR_SURF_P2) |
2891 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2892 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P2) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2896 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   TILE_SPLIT(split_equal_to_row_size));
2900 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2901 			   PIPE_CONFIG(ADDR_SURF_P2) |
2902 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 			   PIPE_CONFIG(ADDR_SURF_P2) |
2906 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909 			   PIPE_CONFIG(ADDR_SURF_P2) |
2910 			   TILE_SPLIT(split_equal_to_row_size));
2911 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2912 			   PIPE_CONFIG(ADDR_SURF_P2);
2913 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2));
2916 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918 			    PIPE_CONFIG(ADDR_SURF_P2) |
2919 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922 			    PIPE_CONFIG(ADDR_SURF_P2) |
2923 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2925 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926 			    PIPE_CONFIG(ADDR_SURF_P2) |
2927 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929 			    PIPE_CONFIG(ADDR_SURF_P2) |
2930 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2931 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933 			    PIPE_CONFIG(ADDR_SURF_P2) |
2934 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937 			    PIPE_CONFIG(ADDR_SURF_P2) |
2938 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2940 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941 			    PIPE_CONFIG(ADDR_SURF_P2) |
2942 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2));
2946 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948 			    PIPE_CONFIG(ADDR_SURF_P2) |
2949 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952 			    PIPE_CONFIG(ADDR_SURF_P2) |
2953 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2955 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2956 			    PIPE_CONFIG(ADDR_SURF_P2) |
2957 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 
2959 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2960 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2961 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962 				NUM_BANKS(ADDR_SURF_16_BANK));
2963 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 				NUM_BANKS(ADDR_SURF_16_BANK));
2967 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2969 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 				NUM_BANKS(ADDR_SURF_16_BANK));
2971 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974 				NUM_BANKS(ADDR_SURF_16_BANK));
2975 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2986 				NUM_BANKS(ADDR_SURF_8_BANK));
2987 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002 				NUM_BANKS(ADDR_SURF_16_BANK));
3003 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3014 				NUM_BANKS(ADDR_SURF_8_BANK));
3015 
3016 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3017 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3018 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3020 		break;
3021 
3022 	default:
3023 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3024 	}
3025 }
3026 
3027 /**
3028  * cik_select_se_sh - select which SE, SH to address
3029  *
3030  * @rdev: radeon_device pointer
3031  * @se_num: shader engine to address
3032  * @sh_num: sh block to address
3033  *
3034  * Select which SE, SH combinations to address. Certain
3035  * registers are instanced per SE or SH.  0xffffffff means
3036  * broadcast to all SEs or SHs (CIK).
3037  */
3038 static void cik_select_se_sh(struct radeon_device *rdev,
3039 			     u32 se_num, u32 sh_num)
3040 {
3041 	u32 data = INSTANCE_BROADCAST_WRITES;
3042 
3043 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3044 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3045 	else if (se_num == 0xffffffff)
3046 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3047 	else if (sh_num == 0xffffffff)
3048 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3049 	else
3050 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3051 	WREG32(GRBM_GFX_INDEX, data);
3052 }
3053 
3054 /**
3055  * cik_create_bitmask - create a bitmask
3056  *
3057  * @bit_width: length of the mask
3058  *
3059  * create a variable length bit mask (CIK).
3060  * Returns the bitmask.
3061  */
3062 static u32 cik_create_bitmask(u32 bit_width)
3063 {
3064 	u32 i, mask = 0;
3065 
3066 	for (i = 0; i < bit_width; i++) {
3067 		mask <<= 1;
3068 		mask |= 1;
3069 	}
3070 	return mask;
3071 }
3072 
3073 /**
3074  * cik_get_rb_disabled - computes the mask of disabled RBs
3075  *
3076  * @rdev: radeon_device pointer
3077  * @max_rb_num: max RBs (render backends) for the asic
3078  * @se_num: number of SEs (shader engines) for the asic
3079  * @sh_per_se: number of SH blocks per SE for the asic
3080  *
3081  * Calculates the bitmask of disabled RBs (CIK).
3082  * Returns the disabled RB bitmask.
3083  */
3084 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3085 			      u32 max_rb_num_per_se,
3086 			      u32 sh_per_se)
3087 {
3088 	u32 data, mask;
3089 
3090 	data = RREG32(CC_RB_BACKEND_DISABLE);
3091 	if (data & 1)
3092 		data &= BACKEND_DISABLE_MASK;
3093 	else
3094 		data = 0;
3095 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3096 
3097 	data >>= BACKEND_DISABLE_SHIFT;
3098 
3099 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3100 
3101 	return data & mask;
3102 }
3103 
3104 /**
3105  * cik_setup_rb - setup the RBs on the asic
3106  *
3107  * @rdev: radeon_device pointer
3108  * @se_num: number of SEs (shader engines) for the asic
3109  * @sh_per_se: number of SH blocks per SE for the asic
3110  * @max_rb_num: max RBs (render backends) for the asic
3111  *
3112  * Configures per-SE/SH RB registers (CIK).
3113  */
3114 static void cik_setup_rb(struct radeon_device *rdev,
3115 			 u32 se_num, u32 sh_per_se,
3116 			 u32 max_rb_num_per_se)
3117 {
3118 	int i, j;
3119 	u32 data, mask;
3120 	u32 disabled_rbs = 0;
3121 	u32 enabled_rbs = 0;
3122 
3123 	mutex_lock(&rdev->grbm_idx_mutex);
3124 	for (i = 0; i < se_num; i++) {
3125 		for (j = 0; j < sh_per_se; j++) {
3126 			cik_select_se_sh(rdev, i, j);
3127 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128 			if (rdev->family == CHIP_HAWAII)
3129 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130 			else
3131 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132 		}
3133 	}
3134 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135 	mutex_unlock(&rdev->grbm_idx_mutex);
3136 
3137 	mask = 1;
3138 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3139 		if (!(disabled_rbs & mask))
3140 			enabled_rbs |= mask;
3141 		mask <<= 1;
3142 	}
3143 
3144 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3145 
3146 	mutex_lock(&rdev->grbm_idx_mutex);
3147 	for (i = 0; i < se_num; i++) {
3148 		cik_select_se_sh(rdev, i, 0xffffffff);
3149 		data = 0;
3150 		for (j = 0; j < sh_per_se; j++) {
3151 			switch (enabled_rbs & 3) {
3152 			case 0:
3153 				if (j == 0)
3154 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3155 				else
3156 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3157 				break;
3158 			case 1:
3159 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3160 				break;
3161 			case 2:
3162 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3163 				break;
3164 			case 3:
3165 			default:
3166 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3167 				break;
3168 			}
3169 			enabled_rbs >>= 2;
3170 		}
3171 		WREG32(PA_SC_RASTER_CONFIG, data);
3172 	}
3173 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3174 	mutex_unlock(&rdev->grbm_idx_mutex);
3175 }
3176 
3177 /**
3178  * cik_gpu_init - setup the 3D engine
3179  *
3180  * @rdev: radeon_device pointer
3181  *
3182  * Configures the 3D engine and tiling configuration
3183  * registers so that the 3D engine is usable.
3184  */
3185 static void cik_gpu_init(struct radeon_device *rdev)
3186 {
3187 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3188 	u32 mc_shared_chmap, mc_arb_ramcfg;
3189 	u32 hdp_host_path_cntl;
3190 	u32 tmp;
3191 	int i, j;
3192 
3193 	switch (rdev->family) {
3194 	case CHIP_BONAIRE:
3195 		rdev->config.cik.max_shader_engines = 2;
3196 		rdev->config.cik.max_tile_pipes = 4;
3197 		rdev->config.cik.max_cu_per_sh = 7;
3198 		rdev->config.cik.max_sh_per_se = 1;
3199 		rdev->config.cik.max_backends_per_se = 2;
3200 		rdev->config.cik.max_texture_channel_caches = 4;
3201 		rdev->config.cik.max_gprs = 256;
3202 		rdev->config.cik.max_gs_threads = 32;
3203 		rdev->config.cik.max_hw_contexts = 8;
3204 
3205 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3210 		break;
3211 	case CHIP_HAWAII:
3212 		rdev->config.cik.max_shader_engines = 4;
3213 		rdev->config.cik.max_tile_pipes = 16;
3214 		rdev->config.cik.max_cu_per_sh = 11;
3215 		rdev->config.cik.max_sh_per_se = 1;
3216 		rdev->config.cik.max_backends_per_se = 4;
3217 		rdev->config.cik.max_texture_channel_caches = 16;
3218 		rdev->config.cik.max_gprs = 256;
3219 		rdev->config.cik.max_gs_threads = 32;
3220 		rdev->config.cik.max_hw_contexts = 8;
3221 
3222 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3227 		break;
3228 	case CHIP_KAVERI:
3229 		rdev->config.cik.max_shader_engines = 1;
3230 		rdev->config.cik.max_tile_pipes = 4;
3231 		if ((rdev->pdev->device == 0x1304) ||
3232 		    (rdev->pdev->device == 0x1305) ||
3233 		    (rdev->pdev->device == 0x130C) ||
3234 		    (rdev->pdev->device == 0x130F) ||
3235 		    (rdev->pdev->device == 0x1310) ||
3236 		    (rdev->pdev->device == 0x1311) ||
3237 		    (rdev->pdev->device == 0x131C)) {
3238 			rdev->config.cik.max_cu_per_sh = 8;
3239 			rdev->config.cik.max_backends_per_se = 2;
3240 		} else if ((rdev->pdev->device == 0x1309) ||
3241 			   (rdev->pdev->device == 0x130A) ||
3242 			   (rdev->pdev->device == 0x130D) ||
3243 			   (rdev->pdev->device == 0x1313) ||
3244 			   (rdev->pdev->device == 0x131D)) {
3245 			rdev->config.cik.max_cu_per_sh = 6;
3246 			rdev->config.cik.max_backends_per_se = 2;
3247 		} else if ((rdev->pdev->device == 0x1306) ||
3248 			   (rdev->pdev->device == 0x1307) ||
3249 			   (rdev->pdev->device == 0x130B) ||
3250 			   (rdev->pdev->device == 0x130E) ||
3251 			   (rdev->pdev->device == 0x1315) ||
3252 			   (rdev->pdev->device == 0x1318) ||
3253 			   (rdev->pdev->device == 0x131B)) {
3254 			rdev->config.cik.max_cu_per_sh = 4;
3255 			rdev->config.cik.max_backends_per_se = 1;
3256 		} else {
3257 			rdev->config.cik.max_cu_per_sh = 3;
3258 			rdev->config.cik.max_backends_per_se = 1;
3259 		}
3260 		rdev->config.cik.max_sh_per_se = 1;
3261 		rdev->config.cik.max_texture_channel_caches = 4;
3262 		rdev->config.cik.max_gprs = 256;
3263 		rdev->config.cik.max_gs_threads = 16;
3264 		rdev->config.cik.max_hw_contexts = 8;
3265 
3266 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3267 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3268 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3269 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3270 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3271 		break;
3272 	case CHIP_KABINI:
3273 	case CHIP_MULLINS:
3274 	default:
3275 		rdev->config.cik.max_shader_engines = 1;
3276 		rdev->config.cik.max_tile_pipes = 2;
3277 		rdev->config.cik.max_cu_per_sh = 2;
3278 		rdev->config.cik.max_sh_per_se = 1;
3279 		rdev->config.cik.max_backends_per_se = 1;
3280 		rdev->config.cik.max_texture_channel_caches = 2;
3281 		rdev->config.cik.max_gprs = 256;
3282 		rdev->config.cik.max_gs_threads = 16;
3283 		rdev->config.cik.max_hw_contexts = 8;
3284 
3285 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3286 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3287 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3288 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3289 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3290 		break;
3291 	}
3292 
3293 	/* Initialize HDP */
3294 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295 		WREG32((0x2c14 + j), 0x00000000);
3296 		WREG32((0x2c18 + j), 0x00000000);
3297 		WREG32((0x2c1c + j), 0x00000000);
3298 		WREG32((0x2c20 + j), 0x00000000);
3299 		WREG32((0x2c24 + j), 0x00000000);
3300 	}
3301 
3302 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3303 	WREG32(SRBM_INT_CNTL, 0x1);
3304 	WREG32(SRBM_INT_ACK, 0x1);
3305 
3306 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3307 
3308 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3309 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3310 
3311 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3312 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3313 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3314 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3315 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3316 		rdev->config.cik.mem_row_size_in_kb = 4;
3317 	/* XXX use MC settings? */
3318 	rdev->config.cik.shader_engine_tile_size = 32;
3319 	rdev->config.cik.num_gpus = 1;
3320 	rdev->config.cik.multi_gpu_tile_size = 64;
3321 
3322 	/* fix up row size */
3323 	gb_addr_config &= ~ROW_SIZE_MASK;
3324 	switch (rdev->config.cik.mem_row_size_in_kb) {
3325 	case 1:
3326 	default:
3327 		gb_addr_config |= ROW_SIZE(0);
3328 		break;
3329 	case 2:
3330 		gb_addr_config |= ROW_SIZE(1);
3331 		break;
3332 	case 4:
3333 		gb_addr_config |= ROW_SIZE(2);
3334 		break;
3335 	}
3336 
3337 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3338 	 * not have bank info, so create a custom tiling dword.
3339 	 * bits 3:0   num_pipes
3340 	 * bits 7:4   num_banks
3341 	 * bits 11:8  group_size
3342 	 * bits 15:12 row_size
3343 	 */
3344 	rdev->config.cik.tile_config = 0;
3345 	switch (rdev->config.cik.num_tile_pipes) {
3346 	case 1:
3347 		rdev->config.cik.tile_config |= (0 << 0);
3348 		break;
3349 	case 2:
3350 		rdev->config.cik.tile_config |= (1 << 0);
3351 		break;
3352 	case 4:
3353 		rdev->config.cik.tile_config |= (2 << 0);
3354 		break;
3355 	case 8:
3356 	default:
3357 		/* XXX what about 12? */
3358 		rdev->config.cik.tile_config |= (3 << 0);
3359 		break;
3360 	}
3361 	rdev->config.cik.tile_config |=
3362 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3363 	rdev->config.cik.tile_config |=
3364 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3365 	rdev->config.cik.tile_config |=
3366 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3367 
3368 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3369 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3370 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3371 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3372 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3373 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3374 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3375 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3376 
3377 	cik_tiling_mode_table_init(rdev);
3378 
3379 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3380 		     rdev->config.cik.max_sh_per_se,
3381 		     rdev->config.cik.max_backends_per_se);
3382 
3383 	rdev->config.cik.active_cus = 0;
3384 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3385 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3386 			rdev->config.cik.active_cus +=
3387 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3388 		}
3389 	}
3390 
3391 	/* set HW defaults for 3D engine */
3392 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3393 
3394 	mutex_lock(&rdev->grbm_idx_mutex);
3395 	/*
3396 	 * making sure that the following register writes will be broadcasted
3397 	 * to all the shaders
3398 	 */
3399 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3400 	WREG32(SX_DEBUG_1, 0x20);
3401 
3402 	WREG32(TA_CNTL_AUX, 0x00010000);
3403 
3404 	tmp = RREG32(SPI_CONFIG_CNTL);
3405 	tmp |= 0x03000000;
3406 	WREG32(SPI_CONFIG_CNTL, tmp);
3407 
3408 	WREG32(SQ_CONFIG, 1);
3409 
3410 	WREG32(DB_DEBUG, 0);
3411 
3412 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3413 	tmp |= 0x00000400;
3414 	WREG32(DB_DEBUG2, tmp);
3415 
3416 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3417 	tmp |= 0x00020200;
3418 	WREG32(DB_DEBUG3, tmp);
3419 
3420 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3421 	tmp |= 0x00018208;
3422 	WREG32(CB_HW_CONTROL, tmp);
3423 
3424 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3425 
3426 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3427 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3428 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3429 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3430 
3431 	WREG32(VGT_NUM_INSTANCES, 1);
3432 
3433 	WREG32(CP_PERFMON_CNTL, 0);
3434 
3435 	WREG32(SQ_CONFIG, 0);
3436 
3437 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3438 					  FORCE_EOV_MAX_REZ_CNT(255)));
3439 
3440 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3441 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3442 
3443 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3444 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3445 
3446 	tmp = RREG32(HDP_MISC_CNTL);
3447 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3448 	WREG32(HDP_MISC_CNTL, tmp);
3449 
3450 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3451 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3452 
3453 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3454 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3455 	mutex_unlock(&rdev->grbm_idx_mutex);
3456 
3457 	udelay(50);
3458 }
3459 
3460 /*
3461  * GPU scratch registers helpers function.
3462  */
3463 /**
3464  * cik_scratch_init - setup driver info for CP scratch regs
3465  *
3466  * @rdev: radeon_device pointer
3467  *
3468  * Set up the number and offset of the CP scratch registers.
3469  * NOTE: use of CP scratch registers is a legacy inferface and
3470  * is not used by default on newer asics (r6xx+).  On newer asics,
3471  * memory buffers are used for fences rather than scratch regs.
3472  */
3473 static void cik_scratch_init(struct radeon_device *rdev)
3474 {
3475 	int i;
3476 
3477 	rdev->scratch.num_reg = 7;
3478 	rdev->scratch.reg_base = SCRATCH_REG0;
3479 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3480 		rdev->scratch.free[i] = true;
3481 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3482 	}
3483 }
3484 
3485 /**
3486  * cik_ring_test - basic gfx ring test
3487  *
3488  * @rdev: radeon_device pointer
3489  * @ring: radeon_ring structure holding ring information
3490  *
3491  * Allocate a scratch register and write to it using the gfx ring (CIK).
3492  * Provides a basic gfx ring test to verify that the ring is working.
3493  * Used by cik_cp_gfx_resume();
3494  * Returns 0 on success, error on failure.
3495  */
3496 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3497 {
3498 	uint32_t scratch;
3499 	uint32_t tmp = 0;
3500 	unsigned i;
3501 	int r;
3502 
3503 	r = radeon_scratch_get(rdev, &scratch);
3504 	if (r) {
3505 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3506 		return r;
3507 	}
3508 	WREG32(scratch, 0xCAFEDEAD);
3509 	r = radeon_ring_lock(rdev, ring, 3);
3510 	if (r) {
3511 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3512 		radeon_scratch_free(rdev, scratch);
3513 		return r;
3514 	}
3515 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3516 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3517 	radeon_ring_write(ring, 0xDEADBEEF);
3518 	radeon_ring_unlock_commit(rdev, ring, false);
3519 
3520 	for (i = 0; i < rdev->usec_timeout; i++) {
3521 		tmp = RREG32(scratch);
3522 		if (tmp == 0xDEADBEEF)
3523 			break;
3524 		DRM_UDELAY(1);
3525 	}
3526 	if (i < rdev->usec_timeout) {
3527 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3528 	} else {
3529 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3530 			  ring->idx, scratch, tmp);
3531 		r = -EINVAL;
3532 	}
3533 	radeon_scratch_free(rdev, scratch);
3534 	return r;
3535 }
3536 
3537 /**
3538  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3539  *
3540  * @rdev: radeon_device pointer
3541  * @ridx: radeon ring index
3542  *
3543  * Emits an hdp flush on the cp.
3544  */
3545 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3546 				       int ridx)
3547 {
3548 	struct radeon_ring *ring = &rdev->ring[ridx];
3549 	u32 ref_and_mask;
3550 
3551 	switch (ring->idx) {
3552 	case CAYMAN_RING_TYPE_CP1_INDEX:
3553 	case CAYMAN_RING_TYPE_CP2_INDEX:
3554 	default:
3555 		switch (ring->me) {
3556 		case 0:
3557 			ref_and_mask = CP2 << ring->pipe;
3558 			break;
3559 		case 1:
3560 			ref_and_mask = CP6 << ring->pipe;
3561 			break;
3562 		default:
3563 			return;
3564 		}
3565 		break;
3566 	case RADEON_RING_TYPE_GFX_INDEX:
3567 		ref_and_mask = CP0;
3568 		break;
3569 	}
3570 
3571 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3572 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3573 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3574 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3575 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3576 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3577 	radeon_ring_write(ring, ref_and_mask);
3578 	radeon_ring_write(ring, ref_and_mask);
3579 	radeon_ring_write(ring, 0x20); /* poll interval */
3580 }
3581 
3582 /**
3583  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the gfx ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3592 			     struct radeon_fence *fence)
3593 {
3594 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3595 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596 
3597 	/* Workaround for cache flush problems. First send a dummy EOP
3598 	 * event down the pipe with seq one below.
3599 	 */
3600 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3601 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3602 				 EOP_TC_ACTION_EN |
3603 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3604 				 EVENT_INDEX(5)));
3605 	radeon_ring_write(ring, addr & 0xfffffffc);
3606 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3607 				DATA_SEL(1) | INT_SEL(0));
3608 	radeon_ring_write(ring, fence->seq - 1);
3609 	radeon_ring_write(ring, 0);
3610 
3611 	/* Then send the real EOP event down the pipe. */
3612 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614 				 EOP_TC_ACTION_EN |
3615 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616 				 EVENT_INDEX(5)));
3617 	radeon_ring_write(ring, addr & 0xfffffffc);
3618 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3619 	radeon_ring_write(ring, fence->seq);
3620 	radeon_ring_write(ring, 0);
3621 }
3622 
3623 /**
3624  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3625  *
3626  * @rdev: radeon_device pointer
3627  * @fence: radeon fence object
3628  *
3629  * Emits a fence sequnce number on the compute ring and flushes
3630  * GPU caches.
3631  */
3632 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3633 				 struct radeon_fence *fence)
3634 {
3635 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3636 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3637 
3638 	/* RELEASE_MEM - flush caches, send int */
3639 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3640 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3641 				 EOP_TC_ACTION_EN |
3642 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3643 				 EVENT_INDEX(5)));
3644 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3645 	radeon_ring_write(ring, addr & 0xfffffffc);
3646 	radeon_ring_write(ring, upper_32_bits(addr));
3647 	radeon_ring_write(ring, fence->seq);
3648 	radeon_ring_write(ring, 0);
3649 }
3650 
3651 /**
3652  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3653  *
3654  * @rdev: radeon_device pointer
3655  * @ring: radeon ring buffer object
3656  * @semaphore: radeon semaphore object
3657  * @emit_wait: Is this a sempahore wait?
3658  *
3659  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3660  * from running ahead of semaphore waits.
3661  */
3662 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3663 			     struct radeon_ring *ring,
3664 			     struct radeon_semaphore *semaphore,
3665 			     bool emit_wait)
3666 {
3667 	uint64_t addr = semaphore->gpu_addr;
3668 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3669 
3670 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3671 	radeon_ring_write(ring, lower_32_bits(addr));
3672 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3673 
3674 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3675 		/* Prevent the PFP from running ahead of the semaphore wait */
3676 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3677 		radeon_ring_write(ring, 0x0);
3678 	}
3679 
3680 	return true;
3681 }
3682 
3683 /**
3684  * cik_copy_cpdma - copy pages using the CP DMA engine
3685  *
3686  * @rdev: radeon_device pointer
3687  * @src_offset: src GPU address
3688  * @dst_offset: dst GPU address
3689  * @num_gpu_pages: number of GPU pages to xfer
3690  * @resv: reservation object to sync to
3691  *
3692  * Copy GPU paging using the CP DMA engine (CIK+).
3693  * Used by the radeon ttm implementation to move pages if
3694  * registered as the asic copy callback.
3695  */
3696 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3697 				    uint64_t src_offset, uint64_t dst_offset,
3698 				    unsigned num_gpu_pages,
3699 				    struct reservation_object *resv)
3700 {
3701 	struct radeon_fence *fence;
3702 	struct radeon_sync sync;
3703 	int ring_index = rdev->asic->copy.blit_ring_index;
3704 	struct radeon_ring *ring = &rdev->ring[ring_index];
3705 	u32 size_in_bytes, cur_size_in_bytes, control;
3706 	int i, num_loops;
3707 	int r = 0;
3708 
3709 	radeon_sync_create(&sync);
3710 
3711 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3712 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3713 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3714 	if (r) {
3715 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3716 		radeon_sync_free(rdev, &sync, NULL);
3717 		return ERR_PTR(r);
3718 	}
3719 
3720 	radeon_sync_resv(rdev, &sync, resv, false);
3721 	radeon_sync_rings(rdev, &sync, ring->idx);
3722 
3723 	for (i = 0; i < num_loops; i++) {
3724 		cur_size_in_bytes = size_in_bytes;
3725 		if (cur_size_in_bytes > 0x1fffff)
3726 			cur_size_in_bytes = 0x1fffff;
3727 		size_in_bytes -= cur_size_in_bytes;
3728 		control = 0;
3729 		if (size_in_bytes == 0)
3730 			control |= PACKET3_DMA_DATA_CP_SYNC;
3731 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3732 		radeon_ring_write(ring, control);
3733 		radeon_ring_write(ring, lower_32_bits(src_offset));
3734 		radeon_ring_write(ring, upper_32_bits(src_offset));
3735 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3736 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3737 		radeon_ring_write(ring, cur_size_in_bytes);
3738 		src_offset += cur_size_in_bytes;
3739 		dst_offset += cur_size_in_bytes;
3740 	}
3741 
3742 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3743 	if (r) {
3744 		radeon_ring_unlock_undo(rdev, ring);
3745 		radeon_sync_free(rdev, &sync, NULL);
3746 		return ERR_PTR(r);
3747 	}
3748 
3749 	radeon_ring_unlock_commit(rdev, ring, false);
3750 	radeon_sync_free(rdev, &sync, fence);
3751 
3752 	return fence;
3753 }
3754 
3755 /*
3756  * IB stuff
3757  */
3758 /**
3759  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3760  *
3761  * @rdev: radeon_device pointer
3762  * @ib: radeon indirect buffer object
3763  *
3764  * Emits a DE (drawing engine) or CE (constant engine) IB
3765  * on the gfx ring.  IBs are usually generated by userspace
3766  * acceleration drivers and submitted to the kernel for
3767  * scheduling on the ring.  This function schedules the IB
3768  * on the gfx ring for execution by the GPU.
3769  */
3770 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3771 {
3772 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3773 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3774 	u32 header, control = INDIRECT_BUFFER_VALID;
3775 
3776 	if (ib->is_const_ib) {
3777 		/* set switch buffer packet before const IB */
3778 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3779 		radeon_ring_write(ring, 0);
3780 
3781 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3782 	} else {
3783 		u32 next_rptr;
3784 		if (ring->rptr_save_reg) {
3785 			next_rptr = ring->wptr + 3 + 4;
3786 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3787 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3788 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3789 			radeon_ring_write(ring, next_rptr);
3790 		} else if (rdev->wb.enabled) {
3791 			next_rptr = ring->wptr + 5 + 4;
3792 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3793 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3794 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3795 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3796 			radeon_ring_write(ring, next_rptr);
3797 		}
3798 
3799 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3800 	}
3801 
3802 	control |= ib->length_dw | (vm_id << 24);
3803 
3804 	radeon_ring_write(ring, header);
3805 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3806 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3807 	radeon_ring_write(ring, control);
3808 }
3809 
3810 /**
3811  * cik_ib_test - basic gfx ring IB test
3812  *
3813  * @rdev: radeon_device pointer
3814  * @ring: radeon_ring structure holding ring information
3815  *
3816  * Allocate an IB and execute it on the gfx ring (CIK).
3817  * Provides a basic gfx ring test to verify that IBs are working.
3818  * Returns 0 on success, error on failure.
3819  */
3820 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3821 {
3822 	struct radeon_ib ib;
3823 	uint32_t scratch;
3824 	uint32_t tmp = 0;
3825 	unsigned i;
3826 	int r;
3827 
3828 	r = radeon_scratch_get(rdev, &scratch);
3829 	if (r) {
3830 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3831 		return r;
3832 	}
3833 	WREG32(scratch, 0xCAFEDEAD);
3834 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3835 	if (r) {
3836 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3837 		radeon_scratch_free(rdev, scratch);
3838 		return r;
3839 	}
3840 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3841 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3842 	ib.ptr[2] = 0xDEADBEEF;
3843 	ib.length_dw = 3;
3844 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3845 	if (r) {
3846 		radeon_scratch_free(rdev, scratch);
3847 		radeon_ib_free(rdev, &ib);
3848 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3849 		return r;
3850 	}
3851 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3852 		RADEON_USEC_IB_TEST_TIMEOUT));
3853 	if (r < 0) {
3854 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3855 		radeon_scratch_free(rdev, scratch);
3856 		radeon_ib_free(rdev, &ib);
3857 		return r;
3858 	} else if (r == 0) {
3859 		DRM_ERROR("radeon: fence wait timed out.\n");
3860 		radeon_scratch_free(rdev, scratch);
3861 		radeon_ib_free(rdev, &ib);
3862 		return -ETIMEDOUT;
3863 	}
3864 	r = 0;
3865 	for (i = 0; i < rdev->usec_timeout; i++) {
3866 		tmp = RREG32(scratch);
3867 		if (tmp == 0xDEADBEEF)
3868 			break;
3869 		DRM_UDELAY(1);
3870 	}
3871 	if (i < rdev->usec_timeout) {
3872 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3873 	} else {
3874 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3875 			  scratch, tmp);
3876 		r = -EINVAL;
3877 	}
3878 	radeon_scratch_free(rdev, scratch);
3879 	radeon_ib_free(rdev, &ib);
3880 	return r;
3881 }
3882 
3883 /*
3884  * CP.
3885  * On CIK, gfx and compute now have independant command processors.
3886  *
3887  * GFX
3888  * Gfx consists of a single ring and can process both gfx jobs and
3889  * compute jobs.  The gfx CP consists of three microengines (ME):
3890  * PFP - Pre-Fetch Parser
3891  * ME - Micro Engine
3892  * CE - Constant Engine
3893  * The PFP and ME make up what is considered the Drawing Engine (DE).
3894  * The CE is an asynchronous engine used for updating buffer desciptors
3895  * used by the DE so that they can be loaded into cache in parallel
3896  * while the DE is processing state update packets.
3897  *
3898  * Compute
3899  * The compute CP consists of two microengines (ME):
3900  * MEC1 - Compute MicroEngine 1
3901  * MEC2 - Compute MicroEngine 2
3902  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3903  * The queues are exposed to userspace and are programmed directly
3904  * by the compute runtime.
3905  */
3906 /**
3907  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3908  *
3909  * @rdev: radeon_device pointer
3910  * @enable: enable or disable the MEs
3911  *
3912  * Halts or unhalts the gfx MEs.
3913  */
3914 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3915 {
3916 	if (enable)
3917 		WREG32(CP_ME_CNTL, 0);
3918 	else {
3919 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3920 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3921 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3922 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3923 	}
3924 	udelay(50);
3925 }
3926 
3927 /**
3928  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3929  *
3930  * @rdev: radeon_device pointer
3931  *
3932  * Loads the gfx PFP, ME, and CE ucode.
3933  * Returns 0 for success, -EINVAL if the ucode is not available.
3934  */
3935 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3936 {
3937 	int i;
3938 
3939 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3940 		return -EINVAL;
3941 
3942 	cik_cp_gfx_enable(rdev, false);
3943 
3944 	if (rdev->new_fw) {
3945 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3946 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3947 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3948 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3949 		const struct gfx_firmware_header_v1_0 *me_hdr =
3950 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3951 		const __le32 *fw_data;
3952 		u32 fw_size;
3953 
3954 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3955 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3956 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3957 
3958 		/* PFP */
3959 		fw_data = (const __le32 *)
3960 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3961 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3962 		WREG32(CP_PFP_UCODE_ADDR, 0);
3963 		for (i = 0; i < fw_size; i++)
3964 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3965 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3966 
3967 		/* CE */
3968 		fw_data = (const __le32 *)
3969 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3970 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3971 		WREG32(CP_CE_UCODE_ADDR, 0);
3972 		for (i = 0; i < fw_size; i++)
3973 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3974 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3975 
3976 		/* ME */
3977 		fw_data = (const __be32 *)
3978 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3979 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3980 		WREG32(CP_ME_RAM_WADDR, 0);
3981 		for (i = 0; i < fw_size; i++)
3982 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3983 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3984 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985 	} else {
3986 		const __be32 *fw_data;
3987 
3988 		/* PFP */
3989 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3990 		WREG32(CP_PFP_UCODE_ADDR, 0);
3991 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3992 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3993 		WREG32(CP_PFP_UCODE_ADDR, 0);
3994 
3995 		/* CE */
3996 		fw_data = (const __be32 *)rdev->ce_fw->data;
3997 		WREG32(CP_CE_UCODE_ADDR, 0);
3998 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3999 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4000 		WREG32(CP_CE_UCODE_ADDR, 0);
4001 
4002 		/* ME */
4003 		fw_data = (const __be32 *)rdev->me_fw->data;
4004 		WREG32(CP_ME_RAM_WADDR, 0);
4005 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4006 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4007 		WREG32(CP_ME_RAM_WADDR, 0);
4008 	}
4009 
4010 	return 0;
4011 }
4012 
4013 /**
4014  * cik_cp_gfx_start - start the gfx ring
4015  *
4016  * @rdev: radeon_device pointer
4017  *
4018  * Enables the ring and loads the clear state context and other
4019  * packets required to init the ring.
4020  * Returns 0 for success, error for failure.
4021  */
4022 static int cik_cp_gfx_start(struct radeon_device *rdev)
4023 {
4024 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4025 	int r, i;
4026 
4027 	/* init the CP */
4028 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4029 	WREG32(CP_ENDIAN_SWAP, 0);
4030 	WREG32(CP_DEVICE_ID, 1);
4031 
4032 	cik_cp_gfx_enable(rdev, true);
4033 
4034 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4035 	if (r) {
4036 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4037 		return r;
4038 	}
4039 
4040 	/* init the CE partitions.  CE only used for gfx on CIK */
4041 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4042 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4043 	radeon_ring_write(ring, 0x8000);
4044 	radeon_ring_write(ring, 0x8000);
4045 
4046 	/* setup clear context state */
4047 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4048 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4049 
4050 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4051 	radeon_ring_write(ring, 0x80000000);
4052 	radeon_ring_write(ring, 0x80000000);
4053 
4054 	for (i = 0; i < cik_default_size; i++)
4055 		radeon_ring_write(ring, cik_default_state[i]);
4056 
4057 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4058 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4059 
4060 	/* set clear context state */
4061 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4062 	radeon_ring_write(ring, 0);
4063 
4064 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4065 	radeon_ring_write(ring, 0x00000316);
4066 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4067 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4068 
4069 	radeon_ring_unlock_commit(rdev, ring, false);
4070 
4071 	return 0;
4072 }
4073 
4074 /**
4075  * cik_cp_gfx_fini - stop the gfx ring
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Stop the gfx ring and tear down the driver ring
4080  * info.
4081  */
4082 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4083 {
4084 	cik_cp_gfx_enable(rdev, false);
4085 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4086 }
4087 
4088 /**
4089  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Program the location and size of the gfx ring buffer
4094  * and test it to make sure it's working.
4095  * Returns 0 for success, error for failure.
4096  */
4097 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4098 {
4099 	struct radeon_ring *ring;
4100 	u32 tmp;
4101 	u32 rb_bufsz;
4102 	u64 rb_addr;
4103 	int r;
4104 
4105 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4106 	if (rdev->family != CHIP_HAWAII)
4107 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4108 
4109 	/* Set the write pointer delay */
4110 	WREG32(CP_RB_WPTR_DELAY, 0);
4111 
4112 	/* set the RB to use vmid 0 */
4113 	WREG32(CP_RB_VMID, 0);
4114 
4115 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4116 
4117 	/* ring 0 - compute and gfx */
4118 	/* Set ring buffer size */
4119 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4120 	rb_bufsz = order_base_2(ring->ring_size / 8);
4121 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4122 #ifdef __BIG_ENDIAN
4123 	tmp |= BUF_SWAP_32BIT;
4124 #endif
4125 	WREG32(CP_RB0_CNTL, tmp);
4126 
4127 	/* Initialize the ring buffer's read and write pointers */
4128 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4129 	ring->wptr = 0;
4130 	WREG32(CP_RB0_WPTR, ring->wptr);
4131 
4132 	/* set the wb address wether it's enabled or not */
4133 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4134 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4135 
4136 	/* scratch register shadowing is no longer supported */
4137 	WREG32(SCRATCH_UMSK, 0);
4138 
4139 	if (!rdev->wb.enabled)
4140 		tmp |= RB_NO_UPDATE;
4141 
4142 	mdelay(1);
4143 	WREG32(CP_RB0_CNTL, tmp);
4144 
4145 	rb_addr = ring->gpu_addr >> 8;
4146 	WREG32(CP_RB0_BASE, rb_addr);
4147 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4148 
4149 	/* start the ring */
4150 	cik_cp_gfx_start(rdev);
4151 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4152 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4153 	if (r) {
4154 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4155 		return r;
4156 	}
4157 
4158 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4159 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4160 
4161 	return 0;
4162 }
4163 
4164 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4165 		     struct radeon_ring *ring)
4166 {
4167 	u32 rptr;
4168 
4169 	if (rdev->wb.enabled)
4170 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4171 	else
4172 		rptr = RREG32(CP_RB0_RPTR);
4173 
4174 	return rptr;
4175 }
4176 
4177 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4178 		     struct radeon_ring *ring)
4179 {
4180 	return RREG32(CP_RB0_WPTR);
4181 }
4182 
4183 void cik_gfx_set_wptr(struct radeon_device *rdev,
4184 		      struct radeon_ring *ring)
4185 {
4186 	WREG32(CP_RB0_WPTR, ring->wptr);
4187 	(void)RREG32(CP_RB0_WPTR);
4188 }
4189 
4190 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4191 			 struct radeon_ring *ring)
4192 {
4193 	u32 rptr;
4194 
4195 	if (rdev->wb.enabled) {
4196 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4197 	} else {
4198 		mutex_lock(&rdev->srbm_mutex);
4199 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200 		rptr = RREG32(CP_HQD_PQ_RPTR);
4201 		cik_srbm_select(rdev, 0, 0, 0, 0);
4202 		mutex_unlock(&rdev->srbm_mutex);
4203 	}
4204 
4205 	return rptr;
4206 }
4207 
4208 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4209 			 struct radeon_ring *ring)
4210 {
4211 	u32 wptr;
4212 
4213 	if (rdev->wb.enabled) {
4214 		/* XXX check if swapping is necessary on BE */
4215 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4216 	} else {
4217 		mutex_lock(&rdev->srbm_mutex);
4218 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4219 		wptr = RREG32(CP_HQD_PQ_WPTR);
4220 		cik_srbm_select(rdev, 0, 0, 0, 0);
4221 		mutex_unlock(&rdev->srbm_mutex);
4222 	}
4223 
4224 	return wptr;
4225 }
4226 
4227 void cik_compute_set_wptr(struct radeon_device *rdev,
4228 			  struct radeon_ring *ring)
4229 {
4230 	/* XXX check if swapping is necessary on BE */
4231 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4232 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4233 }
4234 
4235 static void cik_compute_stop(struct radeon_device *rdev,
4236 			     struct radeon_ring *ring)
4237 {
4238 	u32 j, tmp;
4239 
4240 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4241 	/* Disable wptr polling. */
4242 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4243 	tmp &= ~WPTR_POLL_EN;
4244 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4245 	/* Disable HQD. */
4246 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4247 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4248 		for (j = 0; j < rdev->usec_timeout; j++) {
4249 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4250 				break;
4251 			udelay(1);
4252 		}
4253 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4254 		WREG32(CP_HQD_PQ_RPTR, 0);
4255 		WREG32(CP_HQD_PQ_WPTR, 0);
4256 	}
4257 	cik_srbm_select(rdev, 0, 0, 0, 0);
4258 }
4259 
4260 /**
4261  * cik_cp_compute_enable - enable/disable the compute CP MEs
4262  *
4263  * @rdev: radeon_device pointer
4264  * @enable: enable or disable the MEs
4265  *
4266  * Halts or unhalts the compute MEs.
4267  */
4268 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4269 {
4270 	if (enable)
4271 		WREG32(CP_MEC_CNTL, 0);
4272 	else {
4273 		/*
4274 		 * To make hibernation reliable we need to clear compute ring
4275 		 * configuration before halting the compute ring.
4276 		 */
4277 		mutex_lock(&rdev->srbm_mutex);
4278 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4279 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4280 		mutex_unlock(&rdev->srbm_mutex);
4281 
4282 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4283 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4284 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4285 	}
4286 	udelay(50);
4287 }
4288 
4289 /**
4290  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4291  *
4292  * @rdev: radeon_device pointer
4293  *
4294  * Loads the compute MEC1&2 ucode.
4295  * Returns 0 for success, -EINVAL if the ucode is not available.
4296  */
4297 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4298 {
4299 	int i;
4300 
4301 	if (!rdev->mec_fw)
4302 		return -EINVAL;
4303 
4304 	cik_cp_compute_enable(rdev, false);
4305 
4306 	if (rdev->new_fw) {
4307 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4308 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4309 		const __le32 *fw_data;
4310 		u32 fw_size;
4311 
4312 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4313 
4314 		/* MEC1 */
4315 		fw_data = (const __le32 *)
4316 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4317 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4318 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319 		for (i = 0; i < fw_size; i++)
4320 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4321 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4322 
4323 		/* MEC2 */
4324 		if (rdev->family == CHIP_KAVERI) {
4325 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4326 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4327 
4328 			fw_data = (const __le32 *)
4329 				(rdev->mec2_fw->data +
4330 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4331 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4332 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4333 			for (i = 0; i < fw_size; i++)
4334 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4335 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4336 		}
4337 	} else {
4338 		const __be32 *fw_data;
4339 
4340 		/* MEC1 */
4341 		fw_data = (const __be32 *)rdev->mec_fw->data;
4342 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4343 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4344 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4345 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4346 
4347 		if (rdev->family == CHIP_KAVERI) {
4348 			/* MEC2 */
4349 			fw_data = (const __be32 *)rdev->mec_fw->data;
4350 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4351 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4352 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4353 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4354 		}
4355 	}
4356 
4357 	return 0;
4358 }
4359 
4360 /**
4361  * cik_cp_compute_start - start the compute queues
4362  *
4363  * @rdev: radeon_device pointer
4364  *
4365  * Enable the compute queues.
4366  * Returns 0 for success, error for failure.
4367  */
4368 static int cik_cp_compute_start(struct radeon_device *rdev)
4369 {
4370 	cik_cp_compute_enable(rdev, true);
4371 
4372 	return 0;
4373 }
4374 
4375 /**
4376  * cik_cp_compute_fini - stop the compute queues
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Stop the compute queues and tear down the driver queue
4381  * info.
4382  */
4383 static void cik_cp_compute_fini(struct radeon_device *rdev)
4384 {
4385 	int i, idx, r;
4386 
4387 	cik_cp_compute_enable(rdev, false);
4388 
4389 	for (i = 0; i < 2; i++) {
4390 		if (i == 0)
4391 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4392 		else
4393 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4394 
4395 		if (rdev->ring[idx].mqd_obj) {
4396 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4397 			if (unlikely(r != 0))
4398 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4399 
4400 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4401 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4402 
4403 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4404 			rdev->ring[idx].mqd_obj = NULL;
4405 		}
4406 	}
4407 }
4408 
4409 static void cik_mec_fini(struct radeon_device *rdev)
4410 {
4411 	int r;
4412 
4413 	if (rdev->mec.hpd_eop_obj) {
4414 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415 		if (unlikely(r != 0))
4416 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4417 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4418 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4419 
4420 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4421 		rdev->mec.hpd_eop_obj = NULL;
4422 	}
4423 }
4424 
4425 #define MEC_HPD_SIZE 2048
4426 
4427 static int cik_mec_init(struct radeon_device *rdev)
4428 {
4429 	int r;
4430 	u32 *hpd;
4431 
4432 	/*
4433 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4434 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4435 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4436 	 * be handled by KFD
4437 	 */
4438 	rdev->mec.num_mec = 1;
4439 	rdev->mec.num_pipe = 1;
4440 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4441 
4442 	if (rdev->mec.hpd_eop_obj == NULL) {
4443 		r = radeon_bo_create(rdev,
4444 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4445 				     PAGE_SIZE, true,
4446 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4447 				     &rdev->mec.hpd_eop_obj);
4448 		if (r) {
4449 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4450 			return r;
4451 		}
4452 	}
4453 
4454 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4455 	if (unlikely(r != 0)) {
4456 		cik_mec_fini(rdev);
4457 		return r;
4458 	}
4459 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4460 			  &rdev->mec.hpd_eop_gpu_addr);
4461 	if (r) {
4462 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4463 		cik_mec_fini(rdev);
4464 		return r;
4465 	}
4466 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4467 	if (r) {
4468 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4469 		cik_mec_fini(rdev);
4470 		return r;
4471 	}
4472 
4473 	/* clear memory.  Not sure if this is required or not */
4474 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4475 
4476 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4477 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4478 
4479 	return 0;
4480 }
4481 
4482 struct hqd_registers
4483 {
4484 	u32 cp_mqd_base_addr;
4485 	u32 cp_mqd_base_addr_hi;
4486 	u32 cp_hqd_active;
4487 	u32 cp_hqd_vmid;
4488 	u32 cp_hqd_persistent_state;
4489 	u32 cp_hqd_pipe_priority;
4490 	u32 cp_hqd_queue_priority;
4491 	u32 cp_hqd_quantum;
4492 	u32 cp_hqd_pq_base;
4493 	u32 cp_hqd_pq_base_hi;
4494 	u32 cp_hqd_pq_rptr;
4495 	u32 cp_hqd_pq_rptr_report_addr;
4496 	u32 cp_hqd_pq_rptr_report_addr_hi;
4497 	u32 cp_hqd_pq_wptr_poll_addr;
4498 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4499 	u32 cp_hqd_pq_doorbell_control;
4500 	u32 cp_hqd_pq_wptr;
4501 	u32 cp_hqd_pq_control;
4502 	u32 cp_hqd_ib_base_addr;
4503 	u32 cp_hqd_ib_base_addr_hi;
4504 	u32 cp_hqd_ib_rptr;
4505 	u32 cp_hqd_ib_control;
4506 	u32 cp_hqd_iq_timer;
4507 	u32 cp_hqd_iq_rptr;
4508 	u32 cp_hqd_dequeue_request;
4509 	u32 cp_hqd_dma_offload;
4510 	u32 cp_hqd_sema_cmd;
4511 	u32 cp_hqd_msg_type;
4512 	u32 cp_hqd_atomic0_preop_lo;
4513 	u32 cp_hqd_atomic0_preop_hi;
4514 	u32 cp_hqd_atomic1_preop_lo;
4515 	u32 cp_hqd_atomic1_preop_hi;
4516 	u32 cp_hqd_hq_scheduler0;
4517 	u32 cp_hqd_hq_scheduler1;
4518 	u32 cp_mqd_control;
4519 };
4520 
4521 struct bonaire_mqd
4522 {
4523 	u32 header;
4524 	u32 dispatch_initiator;
4525 	u32 dimensions[3];
4526 	u32 start_idx[3];
4527 	u32 num_threads[3];
4528 	u32 pipeline_stat_enable;
4529 	u32 perf_counter_enable;
4530 	u32 pgm[2];
4531 	u32 tba[2];
4532 	u32 tma[2];
4533 	u32 pgm_rsrc[2];
4534 	u32 vmid;
4535 	u32 resource_limits;
4536 	u32 static_thread_mgmt01[2];
4537 	u32 tmp_ring_size;
4538 	u32 static_thread_mgmt23[2];
4539 	u32 restart[3];
4540 	u32 thread_trace_enable;
4541 	u32 reserved1;
4542 	u32 user_data[16];
4543 	u32 vgtcs_invoke_count[2];
4544 	struct hqd_registers queue_state;
4545 	u32 dequeue_cntr;
4546 	u32 interrupt_queue[64];
4547 };
4548 
4549 /**
4550  * cik_cp_compute_resume - setup the compute queue registers
4551  *
4552  * @rdev: radeon_device pointer
4553  *
4554  * Program the compute queues and test them to make sure they
4555  * are working.
4556  * Returns 0 for success, error for failure.
4557  */
4558 static int cik_cp_compute_resume(struct radeon_device *rdev)
4559 {
4560 	int r, i, j, idx;
4561 	u32 tmp;
4562 	bool use_doorbell = true;
4563 	u64 hqd_gpu_addr;
4564 	u64 mqd_gpu_addr;
4565 	u64 eop_gpu_addr;
4566 	u64 wb_gpu_addr;
4567 	u32 *buf;
4568 	struct bonaire_mqd *mqd;
4569 
4570 	r = cik_cp_compute_start(rdev);
4571 	if (r)
4572 		return r;
4573 
4574 	/* fix up chicken bits */
4575 	tmp = RREG32(CP_CPF_DEBUG);
4576 	tmp |= (1 << 23);
4577 	WREG32(CP_CPF_DEBUG, tmp);
4578 
4579 	/* init the pipes */
4580 	mutex_lock(&rdev->srbm_mutex);
4581 
4582 	for (i = 0; i < rdev->mec.num_pipe; ++i) {
4583 		cik_srbm_select(rdev, 0, i, 0, 0);
4584 
4585 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4586 		/* write the EOP addr */
4587 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4588 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4589 
4590 		/* set the VMID assigned */
4591 		WREG32(CP_HPD_EOP_VMID, 0);
4592 
4593 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4594 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4595 		tmp &= ~EOP_SIZE_MASK;
4596 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4597 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4598 
4599 	}
4600 	mutex_unlock(&rdev->srbm_mutex);
4601 
4602 	/* init the queues.  Just two for now. */
4603 	for (i = 0; i < 2; i++) {
4604 		if (i == 0)
4605 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606 		else
4607 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608 
4609 		if (rdev->ring[idx].mqd_obj == NULL) {
4610 			r = radeon_bo_create(rdev,
4611 					     sizeof(struct bonaire_mqd),
4612 					     PAGE_SIZE, true,
4613 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614 					     NULL, &rdev->ring[idx].mqd_obj);
4615 			if (r) {
4616 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617 				return r;
4618 			}
4619 		}
4620 
4621 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622 		if (unlikely(r != 0)) {
4623 			cik_cp_compute_fini(rdev);
4624 			return r;
4625 		}
4626 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627 				  &mqd_gpu_addr);
4628 		if (r) {
4629 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630 			cik_cp_compute_fini(rdev);
4631 			return r;
4632 		}
4633 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634 		if (r) {
4635 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636 			cik_cp_compute_fini(rdev);
4637 			return r;
4638 		}
4639 
4640 		/* init the mqd struct */
4641 		memset(buf, 0, sizeof(struct bonaire_mqd));
4642 
4643 		mqd = (struct bonaire_mqd *)buf;
4644 		mqd->header = 0xC0310800;
4645 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4646 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4647 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4648 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4649 
4650 		mutex_lock(&rdev->srbm_mutex);
4651 		cik_srbm_select(rdev, rdev->ring[idx].me,
4652 				rdev->ring[idx].pipe,
4653 				rdev->ring[idx].queue, 0);
4654 
4655 		/* disable wptr polling */
4656 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657 		tmp &= ~WPTR_POLL_EN;
4658 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659 
4660 		/* enable doorbell? */
4661 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4662 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663 		if (use_doorbell)
4664 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665 		else
4666 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4669 
4670 		/* disable the queue if it's active */
4671 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4672 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4673 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4674 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4675 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676 			for (j = 0; j < rdev->usec_timeout; j++) {
4677 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678 					break;
4679 				udelay(1);
4680 			}
4681 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684 		}
4685 
4686 		/* set the pointer to the MQD */
4687 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691 		/* set MQD vmid to 0 */
4692 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695 
4696 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702 
4703 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4704 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705 		mqd->queue_state.cp_hqd_pq_control &=
4706 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707 
4708 		mqd->queue_state.cp_hqd_pq_control |=
4709 			order_base_2(rdev->ring[idx].ring_size / 8);
4710 		mqd->queue_state.cp_hqd_pq_control |=
4711 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715 		mqd->queue_state.cp_hqd_pq_control &=
4716 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717 		mqd->queue_state.cp_hqd_pq_control |=
4718 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720 
4721 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722 		if (i == 0)
4723 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724 		else
4725 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731 
4732 		/* set the wb address wether it's enabled or not */
4733 		if (i == 0)
4734 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735 		else
4736 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739 			upper_32_bits(wb_gpu_addr) & 0xffff;
4740 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744 
4745 		/* enable the doorbell if requested */
4746 		if (use_doorbell) {
4747 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4748 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4755 
4756 		} else {
4757 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758 		}
4759 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4761 
4762 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763 		rdev->ring[idx].wptr = 0;
4764 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767 
4768 		/* set the vmid for the queue */
4769 		mqd->queue_state.cp_hqd_vmid = 0;
4770 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771 
4772 		/* activate the queue */
4773 		mqd->queue_state.cp_hqd_active = 1;
4774 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775 
4776 		cik_srbm_select(rdev, 0, 0, 0, 0);
4777 		mutex_unlock(&rdev->srbm_mutex);
4778 
4779 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781 
4782 		rdev->ring[idx].ready = true;
4783 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784 		if (r)
4785 			rdev->ring[idx].ready = false;
4786 	}
4787 
4788 	return 0;
4789 }
4790 
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793 	cik_cp_gfx_enable(rdev, enable);
4794 	cik_cp_compute_enable(rdev, enable);
4795 }
4796 
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799 	int r;
4800 
4801 	r = cik_cp_gfx_load_microcode(rdev);
4802 	if (r)
4803 		return r;
4804 	r = cik_cp_compute_load_microcode(rdev);
4805 	if (r)
4806 		return r;
4807 
4808 	return 0;
4809 }
4810 
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813 	cik_cp_gfx_fini(rdev);
4814 	cik_cp_compute_fini(rdev);
4815 }
4816 
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819 	int r;
4820 
4821 	cik_enable_gui_idle_interrupt(rdev, false);
4822 
4823 	r = cik_cp_load_microcode(rdev);
4824 	if (r)
4825 		return r;
4826 
4827 	r = cik_cp_gfx_resume(rdev);
4828 	if (r)
4829 		return r;
4830 	r = cik_cp_compute_resume(rdev);
4831 	if (r)
4832 		return r;
4833 
4834 	cik_enable_gui_idle_interrupt(rdev, true);
4835 
4836 	return 0;
4837 }
4838 
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842 		RREG32(GRBM_STATUS));
4843 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844 		RREG32(GRBM_STATUS2));
4845 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846 		RREG32(GRBM_STATUS_SE0));
4847 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848 		RREG32(GRBM_STATUS_SE1));
4849 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850 		RREG32(GRBM_STATUS_SE2));
4851 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852 		RREG32(GRBM_STATUS_SE3));
4853 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854 		RREG32(SRBM_STATUS));
4855 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856 		RREG32(SRBM_STATUS2));
4857 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863 		 RREG32(CP_STALLED_STAT1));
4864 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865 		 RREG32(CP_STALLED_STAT2));
4866 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867 		 RREG32(CP_STALLED_STAT3));
4868 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869 		 RREG32(CP_CPF_BUSY_STAT));
4870 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871 		 RREG32(CP_CPF_STALLED_STAT1));
4872 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875 		 RREG32(CP_CPC_STALLED_STAT1));
4876 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878 
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890 	u32 reset_mask = 0;
4891 	u32 tmp;
4892 
4893 	/* GRBM_STATUS */
4894 	tmp = RREG32(GRBM_STATUS);
4895 	if (tmp & (PA_BUSY | SC_BUSY |
4896 		   BCI_BUSY | SX_BUSY |
4897 		   TA_BUSY | VGT_BUSY |
4898 		   DB_BUSY | CB_BUSY |
4899 		   GDS_BUSY | SPI_BUSY |
4900 		   IA_BUSY | IA_BUSY_NO_DMA))
4901 		reset_mask |= RADEON_RESET_GFX;
4902 
4903 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904 		reset_mask |= RADEON_RESET_CP;
4905 
4906 	/* GRBM_STATUS2 */
4907 	tmp = RREG32(GRBM_STATUS2);
4908 	if (tmp & RLC_BUSY)
4909 		reset_mask |= RADEON_RESET_RLC;
4910 
4911 	/* SDMA0_STATUS_REG */
4912 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913 	if (!(tmp & SDMA_IDLE))
4914 		reset_mask |= RADEON_RESET_DMA;
4915 
4916 	/* SDMA1_STATUS_REG */
4917 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918 	if (!(tmp & SDMA_IDLE))
4919 		reset_mask |= RADEON_RESET_DMA1;
4920 
4921 	/* SRBM_STATUS2 */
4922 	tmp = RREG32(SRBM_STATUS2);
4923 	if (tmp & SDMA_BUSY)
4924 		reset_mask |= RADEON_RESET_DMA;
4925 
4926 	if (tmp & SDMA1_BUSY)
4927 		reset_mask |= RADEON_RESET_DMA1;
4928 
4929 	/* SRBM_STATUS */
4930 	tmp = RREG32(SRBM_STATUS);
4931 
4932 	if (tmp & IH_BUSY)
4933 		reset_mask |= RADEON_RESET_IH;
4934 
4935 	if (tmp & SEM_BUSY)
4936 		reset_mask |= RADEON_RESET_SEM;
4937 
4938 	if (tmp & GRBM_RQ_PENDING)
4939 		reset_mask |= RADEON_RESET_GRBM;
4940 
4941 	if (tmp & VMC_BUSY)
4942 		reset_mask |= RADEON_RESET_VMC;
4943 
4944 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945 		   MCC_BUSY | MCD_BUSY))
4946 		reset_mask |= RADEON_RESET_MC;
4947 
4948 	if (evergreen_is_display_hung(rdev))
4949 		reset_mask |= RADEON_RESET_DISPLAY;
4950 
4951 	/* Skip MC reset as it's mostly likely not hung, just busy */
4952 	if (reset_mask & RADEON_RESET_MC) {
4953 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954 		reset_mask &= ~RADEON_RESET_MC;
4955 	}
4956 
4957 	return reset_mask;
4958 }
4959 
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970 	struct evergreen_mc_save save;
4971 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972 	u32 tmp;
4973 
4974 	if (reset_mask == 0)
4975 		return;
4976 
4977 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978 
4979 	cik_print_gpu_status_regs(rdev);
4980 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984 
4985 	/* disable CG/PG */
4986 	cik_fini_pg(rdev);
4987 	cik_fini_cg(rdev);
4988 
4989 	/* stop the rlc */
4990 	cik_rlc_stop(rdev);
4991 
4992 	/* Disable GFX parsing/prefetching */
4993 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994 
4995 	/* Disable MEC parsing/prefetching */
4996 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997 
4998 	if (reset_mask & RADEON_RESET_DMA) {
4999 		/* sdma0 */
5000 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001 		tmp |= SDMA_HALT;
5002 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003 	}
5004 	if (reset_mask & RADEON_RESET_DMA1) {
5005 		/* sdma1 */
5006 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007 		tmp |= SDMA_HALT;
5008 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009 	}
5010 
5011 	evergreen_mc_stop(rdev, &save);
5012 	if (evergreen_mc_wait_for_idle(rdev)) {
5013 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014 	}
5015 
5016 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018 
5019 	if (reset_mask & RADEON_RESET_CP) {
5020 		grbm_soft_reset |= SOFT_RESET_CP;
5021 
5022 		srbm_soft_reset |= SOFT_RESET_GRBM;
5023 	}
5024 
5025 	if (reset_mask & RADEON_RESET_DMA)
5026 		srbm_soft_reset |= SOFT_RESET_SDMA;
5027 
5028 	if (reset_mask & RADEON_RESET_DMA1)
5029 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5030 
5031 	if (reset_mask & RADEON_RESET_DISPLAY)
5032 		srbm_soft_reset |= SOFT_RESET_DC;
5033 
5034 	if (reset_mask & RADEON_RESET_RLC)
5035 		grbm_soft_reset |= SOFT_RESET_RLC;
5036 
5037 	if (reset_mask & RADEON_RESET_SEM)
5038 		srbm_soft_reset |= SOFT_RESET_SEM;
5039 
5040 	if (reset_mask & RADEON_RESET_IH)
5041 		srbm_soft_reset |= SOFT_RESET_IH;
5042 
5043 	if (reset_mask & RADEON_RESET_GRBM)
5044 		srbm_soft_reset |= SOFT_RESET_GRBM;
5045 
5046 	if (reset_mask & RADEON_RESET_VMC)
5047 		srbm_soft_reset |= SOFT_RESET_VMC;
5048 
5049 	if (!(rdev->flags & RADEON_IS_IGP)) {
5050 		if (reset_mask & RADEON_RESET_MC)
5051 			srbm_soft_reset |= SOFT_RESET_MC;
5052 	}
5053 
5054 	if (grbm_soft_reset) {
5055 		tmp = RREG32(GRBM_SOFT_RESET);
5056 		tmp |= grbm_soft_reset;
5057 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058 		WREG32(GRBM_SOFT_RESET, tmp);
5059 		tmp = RREG32(GRBM_SOFT_RESET);
5060 
5061 		udelay(50);
5062 
5063 		tmp &= ~grbm_soft_reset;
5064 		WREG32(GRBM_SOFT_RESET, tmp);
5065 		tmp = RREG32(GRBM_SOFT_RESET);
5066 	}
5067 
5068 	if (srbm_soft_reset) {
5069 		tmp = RREG32(SRBM_SOFT_RESET);
5070 		tmp |= srbm_soft_reset;
5071 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072 		WREG32(SRBM_SOFT_RESET, tmp);
5073 		tmp = RREG32(SRBM_SOFT_RESET);
5074 
5075 		udelay(50);
5076 
5077 		tmp &= ~srbm_soft_reset;
5078 		WREG32(SRBM_SOFT_RESET, tmp);
5079 		tmp = RREG32(SRBM_SOFT_RESET);
5080 	}
5081 
5082 	/* Wait a little for things to settle down */
5083 	udelay(50);
5084 
5085 	evergreen_mc_resume(rdev, &save);
5086 	udelay(50);
5087 
5088 	cik_print_gpu_status_regs(rdev);
5089 }
5090 
5091 struct kv_reset_save_regs {
5092 	u32 gmcon_reng_execute;
5093 	u32 gmcon_misc;
5094 	u32 gmcon_misc3;
5095 };
5096 
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098 				   struct kv_reset_save_regs *save)
5099 {
5100 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101 	save->gmcon_misc = RREG32(GMCON_MISC);
5102 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103 
5104 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106 						STCTRL_STUTTER_EN));
5107 }
5108 
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110 				      struct kv_reset_save_regs *save)
5111 {
5112 	int i;
5113 
5114 	WREG32(GMCON_PGFSM_WRITE, 0);
5115 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116 
5117 	for (i = 0; i < 5; i++)
5118 		WREG32(GMCON_PGFSM_WRITE, 0);
5119 
5120 	WREG32(GMCON_PGFSM_WRITE, 0);
5121 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122 
5123 	for (i = 0; i < 5; i++)
5124 		WREG32(GMCON_PGFSM_WRITE, 0);
5125 
5126 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128 
5129 	for (i = 0; i < 5; i++)
5130 		WREG32(GMCON_PGFSM_WRITE, 0);
5131 
5132 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134 
5135 	for (i = 0; i < 5; i++)
5136 		WREG32(GMCON_PGFSM_WRITE, 0);
5137 
5138 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140 
5141 	for (i = 0; i < 5; i++)
5142 		WREG32(GMCON_PGFSM_WRITE, 0);
5143 
5144 	WREG32(GMCON_PGFSM_WRITE, 0);
5145 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146 
5147 	for (i = 0; i < 5; i++)
5148 		WREG32(GMCON_PGFSM_WRITE, 0);
5149 
5150 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152 
5153 	for (i = 0; i < 5; i++)
5154 		WREG32(GMCON_PGFSM_WRITE, 0);
5155 
5156 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158 
5159 	for (i = 0; i < 5; i++)
5160 		WREG32(GMCON_PGFSM_WRITE, 0);
5161 
5162 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164 
5165 	for (i = 0; i < 5; i++)
5166 		WREG32(GMCON_PGFSM_WRITE, 0);
5167 
5168 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170 
5171 	for (i = 0; i < 5; i++)
5172 		WREG32(GMCON_PGFSM_WRITE, 0);
5173 
5174 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176 
5177 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5178 	WREG32(GMCON_MISC, save->gmcon_misc);
5179 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181 
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184 	struct evergreen_mc_save save;
5185 	struct kv_reset_save_regs kv_save = { 0 };
5186 	u32 tmp, i;
5187 
5188 	dev_info(rdev->dev, "GPU pci config reset\n");
5189 
5190 	/* disable dpm? */
5191 
5192 	/* disable cg/pg */
5193 	cik_fini_pg(rdev);
5194 	cik_fini_cg(rdev);
5195 
5196 	/* Disable GFX parsing/prefetching */
5197 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198 
5199 	/* Disable MEC parsing/prefetching */
5200 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201 
5202 	/* sdma0 */
5203 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204 	tmp |= SDMA_HALT;
5205 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206 	/* sdma1 */
5207 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208 	tmp |= SDMA_HALT;
5209 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210 	/* XXX other engines? */
5211 
5212 	/* halt the rlc, disable cp internal ints */
5213 	cik_rlc_stop(rdev);
5214 
5215 	udelay(50);
5216 
5217 	/* disable mem access */
5218 	evergreen_mc_stop(rdev, &save);
5219 	if (evergreen_mc_wait_for_idle(rdev)) {
5220 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221 	}
5222 
5223 	if (rdev->flags & RADEON_IS_IGP)
5224 		kv_save_regs_for_reset(rdev, &kv_save);
5225 
5226 	/* disable BM */
5227 	pci_clear_master(rdev->pdev);
5228 	/* reset */
5229 	radeon_pci_config_reset(rdev);
5230 
5231 	udelay(100);
5232 
5233 	/* wait for asic to come out of reset */
5234 	for (i = 0; i < rdev->usec_timeout; i++) {
5235 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236 			break;
5237 		udelay(1);
5238 	}
5239 
5240 	/* does asic init need to be run first??? */
5241 	if (rdev->flags & RADEON_IS_IGP)
5242 		kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244 
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257 	u32 reset_mask;
5258 
5259 	if (hard) {
5260 		cik_gpu_pci_config_reset(rdev);
5261 		return 0;
5262 	}
5263 
5264 	reset_mask = cik_gpu_check_soft_reset(rdev);
5265 
5266 	if (reset_mask)
5267 		r600_set_bios_scratch_engine_hung(rdev, true);
5268 
5269 	/* try soft reset */
5270 	cik_gpu_soft_reset(rdev, reset_mask);
5271 
5272 	reset_mask = cik_gpu_check_soft_reset(rdev);
5273 
5274 	/* try pci config reset */
5275 	if (reset_mask && radeon_hard_reset)
5276 		cik_gpu_pci_config_reset(rdev);
5277 
5278 	reset_mask = cik_gpu_check_soft_reset(rdev);
5279 
5280 	if (!reset_mask)
5281 		r600_set_bios_scratch_engine_hung(rdev, false);
5282 
5283 	return 0;
5284 }
5285 
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298 
5299 	if (!(reset_mask & (RADEON_RESET_GFX |
5300 			    RADEON_RESET_COMPUTE |
5301 			    RADEON_RESET_CP))) {
5302 		radeon_ring_lockup_update(rdev, ring);
5303 		return false;
5304 	}
5305 	return radeon_ring_test_lockup(rdev, ring);
5306 }
5307 
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319 	struct evergreen_mc_save save;
5320 	u32 tmp;
5321 	int i, j;
5322 
5323 	/* Initialize HDP */
5324 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325 		WREG32((0x2c14 + j), 0x00000000);
5326 		WREG32((0x2c18 + j), 0x00000000);
5327 		WREG32((0x2c1c + j), 0x00000000);
5328 		WREG32((0x2c20 + j), 0x00000000);
5329 		WREG32((0x2c24 + j), 0x00000000);
5330 	}
5331 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332 
5333 	evergreen_mc_stop(rdev, &save);
5334 	if (radeon_mc_wait_for_idle(rdev)) {
5335 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336 	}
5337 	/* Lockout access through VGA aperture*/
5338 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339 	/* Update configuration */
5340 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341 	       rdev->mc.vram_start >> 12);
5342 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343 	       rdev->mc.vram_end >> 12);
5344 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345 	       rdev->vram_scratch.gpu_addr >> 12);
5346 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348 	WREG32(MC_VM_FB_LOCATION, tmp);
5349 	/* XXX double check these! */
5350 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353 	WREG32(MC_VM_AGP_BASE, 0);
5354 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356 	if (radeon_mc_wait_for_idle(rdev)) {
5357 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358 	}
5359 	evergreen_mc_resume(rdev, &save);
5360 	/* we need to own VRAM, so turn off the VGA renderer here
5361 	 * to stop it overwriting our objects */
5362 	rv515_vga_render_disable(rdev);
5363 }
5364 
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376 	u32 tmp;
5377 	int chansize, numchan;
5378 
5379 	/* Get VRAM informations */
5380 	rdev->mc.vram_is_ddr = true;
5381 	tmp = RREG32(MC_ARB_RAMCFG);
5382 	if (tmp & CHANSIZE_MASK) {
5383 		chansize = 64;
5384 	} else {
5385 		chansize = 32;
5386 	}
5387 	tmp = RREG32(MC_SHARED_CHMAP);
5388 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389 	case 0:
5390 	default:
5391 		numchan = 1;
5392 		break;
5393 	case 1:
5394 		numchan = 2;
5395 		break;
5396 	case 2:
5397 		numchan = 4;
5398 		break;
5399 	case 3:
5400 		numchan = 8;
5401 		break;
5402 	case 4:
5403 		numchan = 3;
5404 		break;
5405 	case 5:
5406 		numchan = 6;
5407 		break;
5408 	case 6:
5409 		numchan = 10;
5410 		break;
5411 	case 7:
5412 		numchan = 12;
5413 		break;
5414 	case 8:
5415 		numchan = 16;
5416 		break;
5417 	}
5418 	rdev->mc.vram_width = numchan * chansize;
5419 	/* Could aper size report 0 ? */
5420 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422 	/* size in MB on si */
5423 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426 	si_vram_gtt_location(rdev, &rdev->mc);
5427 	radeon_update_bandwidth_info(rdev);
5428 
5429 	return 0;
5430 }
5431 
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447 	/* flush hdp cache */
5448 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449 
5450 	/* bits 0-15 are the VM contexts0-15 */
5451 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453 
5454 /**
5455  * cik_pcie_gart_enable - gart enable
5456  *
5457  * @rdev: radeon_device pointer
5458  *
5459  * This sets up the TLBs, programs the page tables for VMID0,
5460  * sets up the hw for VMIDs 1-15 which are allocated on
5461  * demand, and sets up the global locations for the LDS, GDS,
5462  * and GPUVM for FSA64 clients (CIK).
5463  * Returns 0 for success, errors for failure.
5464  */
5465 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5466 {
5467 	int r, i;
5468 
5469 	if (rdev->gart.robj == NULL) {
5470 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5471 		return -EINVAL;
5472 	}
5473 	r = radeon_gart_table_vram_pin(rdev);
5474 	if (r)
5475 		return r;
5476 	/* Setup TLB control */
5477 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5478 	       (0xA << 7) |
5479 	       ENABLE_L1_TLB |
5480 	       ENABLE_L1_FRAGMENT_PROCESSING |
5481 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5482 	       ENABLE_ADVANCED_DRIVER_MODEL |
5483 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5484 	/* Setup L2 cache */
5485 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5486 	       ENABLE_L2_FRAGMENT_PROCESSING |
5487 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5488 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5489 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5490 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5491 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5492 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5493 	       BANK_SELECT(4) |
5494 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5495 	/* setup context0 */
5496 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5497 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5498 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5499 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5500 			(u32)(rdev->dummy_page.addr >> 12));
5501 	WREG32(VM_CONTEXT0_CNTL2, 0);
5502 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5503 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5504 
5505 	WREG32(0x15D4, 0);
5506 	WREG32(0x15D8, 0);
5507 	WREG32(0x15DC, 0);
5508 
5509 	/* restore context1-15 */
5510 	/* set vm size, must be a multiple of 4 */
5511 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5512 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5513 	for (i = 1; i < 16; i++) {
5514 		if (i < 8)
5515 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5516 			       rdev->vm_manager.saved_table_addr[i]);
5517 		else
5518 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5519 			       rdev->vm_manager.saved_table_addr[i]);
5520 	}
5521 
5522 	/* enable context1-15 */
5523 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5524 	       (u32)(rdev->dummy_page.addr >> 12));
5525 	WREG32(VM_CONTEXT1_CNTL2, 4);
5526 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5527 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5528 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5529 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5530 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5531 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5532 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5533 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5534 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5535 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5536 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5537 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5538 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5539 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5540 
5541 	if (rdev->family == CHIP_KAVERI) {
5542 		u32 tmp = RREG32(CHUB_CONTROL);
5543 		tmp &= ~BYPASS_VM;
5544 		WREG32(CHUB_CONTROL, tmp);
5545 	}
5546 
5547 	/* XXX SH_MEM regs */
5548 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5549 	mutex_lock(&rdev->srbm_mutex);
5550 	for (i = 0; i < 16; i++) {
5551 		cik_srbm_select(rdev, 0, 0, 0, i);
5552 		/* CP and shaders */
5553 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5554 		WREG32(SH_MEM_APE1_BASE, 1);
5555 		WREG32(SH_MEM_APE1_LIMIT, 0);
5556 		WREG32(SH_MEM_BASES, 0);
5557 		/* SDMA GFX */
5558 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5559 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5560 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5561 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5562 		/* XXX SDMA RLC - todo */
5563 	}
5564 	cik_srbm_select(rdev, 0, 0, 0, 0);
5565 	mutex_unlock(&rdev->srbm_mutex);
5566 
5567 	cik_pcie_gart_tlb_flush(rdev);
5568 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5569 		 (unsigned)(rdev->mc.gtt_size >> 20),
5570 		 (unsigned long long)rdev->gart.table_addr);
5571 	rdev->gart.ready = true;
5572 	return 0;
5573 }
5574 
5575 /**
5576  * cik_pcie_gart_disable - gart disable
5577  *
5578  * @rdev: radeon_device pointer
5579  *
5580  * This disables all VM page table (CIK).
5581  */
5582 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5583 {
5584 	unsigned i;
5585 
5586 	for (i = 1; i < 16; ++i) {
5587 		uint32_t reg;
5588 		if (i < 8)
5589 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5590 		else
5591 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5592 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5593 	}
5594 
5595 	/* Disable all tables */
5596 	WREG32(VM_CONTEXT0_CNTL, 0);
5597 	WREG32(VM_CONTEXT1_CNTL, 0);
5598 	/* Setup TLB control */
5599 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5600 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5601 	/* Setup L2 cache */
5602 	WREG32(VM_L2_CNTL,
5603 	       ENABLE_L2_FRAGMENT_PROCESSING |
5604 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5605 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5606 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5607 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5608 	WREG32(VM_L2_CNTL2, 0);
5609 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5610 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5611 	radeon_gart_table_vram_unpin(rdev);
5612 }
5613 
5614 /**
5615  * cik_pcie_gart_fini - vm fini callback
5616  *
5617  * @rdev: radeon_device pointer
5618  *
5619  * Tears down the driver GART/VM setup (CIK).
5620  */
5621 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5622 {
5623 	cik_pcie_gart_disable(rdev);
5624 	radeon_gart_table_vram_free(rdev);
5625 	radeon_gart_fini(rdev);
5626 }
5627 
5628 /* vm parser */
5629 /**
5630  * cik_ib_parse - vm ib_parse callback
5631  *
5632  * @rdev: radeon_device pointer
5633  * @ib: indirect buffer pointer
5634  *
5635  * CIK uses hw IB checking so this is a nop (CIK).
5636  */
5637 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5638 {
5639 	return 0;
5640 }
5641 
5642 /*
5643  * vm
5644  * VMID 0 is the physical GPU addresses as used by the kernel.
5645  * VMIDs 1-15 are used for userspace clients and are handled
5646  * by the radeon vm/hsa code.
5647  */
5648 /**
5649  * cik_vm_init - cik vm init callback
5650  *
5651  * @rdev: radeon_device pointer
5652  *
5653  * Inits cik specific vm parameters (number of VMs, base of vram for
5654  * VMIDs 1-15) (CIK).
5655  * Returns 0 for success.
5656  */
5657 int cik_vm_init(struct radeon_device *rdev)
5658 {
5659 	/*
5660 	 * number of VMs
5661 	 * VMID 0 is reserved for System
5662 	 * radeon graphics/compute will use VMIDs 1-15
5663 	 */
5664 	rdev->vm_manager.nvm = 16;
5665 	/* base offset of vram pages */
5666 	if (rdev->flags & RADEON_IS_IGP) {
5667 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5668 		tmp <<= 22;
5669 		rdev->vm_manager.vram_base_offset = tmp;
5670 	} else
5671 		rdev->vm_manager.vram_base_offset = 0;
5672 
5673 	return 0;
5674 }
5675 
5676 /**
5677  * cik_vm_fini - cik vm fini callback
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * Tear down any asic specific VM setup (CIK).
5682  */
5683 void cik_vm_fini(struct radeon_device *rdev)
5684 {
5685 }
5686 
5687 /**
5688  * cik_vm_decode_fault - print human readable fault info
5689  *
5690  * @rdev: radeon_device pointer
5691  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5692  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5693  *
5694  * Print human readable fault information (CIK).
5695  */
5696 static void cik_vm_decode_fault(struct radeon_device *rdev,
5697 				u32 status, u32 addr, u32 mc_client)
5698 {
5699 	u32 mc_id;
5700 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5701 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5702 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5703 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5704 
5705 	if (rdev->family == CHIP_HAWAII)
5706 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5707 	else
5708 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5709 
5710 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5711 	       protections, vmid, addr,
5712 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5713 	       block, mc_client, mc_id);
5714 }
5715 
5716 /**
5717  * cik_vm_flush - cik vm flush using the CP
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Update the page table base and flush the VM TLB
5722  * using the CP (CIK).
5723  */
5724 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5725 		  unsigned vm_id, uint64_t pd_addr)
5726 {
5727 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5728 
5729 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731 				 WRITE_DATA_DST_SEL(0)));
5732 	if (vm_id < 8) {
5733 		radeon_ring_write(ring,
5734 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5735 	} else {
5736 		radeon_ring_write(ring,
5737 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5738 	}
5739 	radeon_ring_write(ring, 0);
5740 	radeon_ring_write(ring, pd_addr >> 12);
5741 
5742 	/* update SH_MEM_* regs */
5743 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5744 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5745 				 WRITE_DATA_DST_SEL(0)));
5746 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5747 	radeon_ring_write(ring, 0);
5748 	radeon_ring_write(ring, VMID(vm_id));
5749 
5750 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5751 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5752 				 WRITE_DATA_DST_SEL(0)));
5753 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5754 	radeon_ring_write(ring, 0);
5755 
5756 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5757 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5758 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5759 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5760 
5761 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5762 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5763 				 WRITE_DATA_DST_SEL(0)));
5764 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5765 	radeon_ring_write(ring, 0);
5766 	radeon_ring_write(ring, VMID(0));
5767 
5768 	/* HDP flush */
5769 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5770 
5771 	/* bits 0-15 are the VM contexts0-15 */
5772 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5773 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5774 				 WRITE_DATA_DST_SEL(0)));
5775 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5776 	radeon_ring_write(ring, 0);
5777 	radeon_ring_write(ring, 1 << vm_id);
5778 
5779 	/* wait for the invalidate to complete */
5780 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5781 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5782 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5783 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5784 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5785 	radeon_ring_write(ring, 0);
5786 	radeon_ring_write(ring, 0); /* ref */
5787 	radeon_ring_write(ring, 0); /* mask */
5788 	radeon_ring_write(ring, 0x20); /* poll interval */
5789 
5790 	/* compute doesn't have PFP */
5791 	if (usepfp) {
5792 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5793 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5794 		radeon_ring_write(ring, 0x0);
5795 	}
5796 }
5797 
5798 /*
5799  * RLC
5800  * The RLC is a multi-purpose microengine that handles a
5801  * variety of functions, the most important of which is
5802  * the interrupt controller.
5803  */
5804 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5805 					  bool enable)
5806 {
5807 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5808 
5809 	if (enable)
5810 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5811 	else
5812 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5813 	WREG32(CP_INT_CNTL_RING0, tmp);
5814 }
5815 
5816 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5817 {
5818 	u32 tmp;
5819 
5820 	tmp = RREG32(RLC_LB_CNTL);
5821 	if (enable)
5822 		tmp |= LOAD_BALANCE_ENABLE;
5823 	else
5824 		tmp &= ~LOAD_BALANCE_ENABLE;
5825 	WREG32(RLC_LB_CNTL, tmp);
5826 }
5827 
5828 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5829 {
5830 	u32 i, j, k;
5831 	u32 mask;
5832 
5833 	mutex_lock(&rdev->grbm_idx_mutex);
5834 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5835 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5836 			cik_select_se_sh(rdev, i, j);
5837 			for (k = 0; k < rdev->usec_timeout; k++) {
5838 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5839 					break;
5840 				udelay(1);
5841 			}
5842 		}
5843 	}
5844 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5845 	mutex_unlock(&rdev->grbm_idx_mutex);
5846 
5847 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5848 	for (k = 0; k < rdev->usec_timeout; k++) {
5849 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5850 			break;
5851 		udelay(1);
5852 	}
5853 }
5854 
5855 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5856 {
5857 	u32 tmp;
5858 
5859 	tmp = RREG32(RLC_CNTL);
5860 	if (tmp != rlc)
5861 		WREG32(RLC_CNTL, rlc);
5862 }
5863 
5864 static u32 cik_halt_rlc(struct radeon_device *rdev)
5865 {
5866 	u32 data, orig;
5867 
5868 	orig = data = RREG32(RLC_CNTL);
5869 
5870 	if (data & RLC_ENABLE) {
5871 		u32 i;
5872 
5873 		data &= ~RLC_ENABLE;
5874 		WREG32(RLC_CNTL, data);
5875 
5876 		for (i = 0; i < rdev->usec_timeout; i++) {
5877 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5878 				break;
5879 			udelay(1);
5880 		}
5881 
5882 		cik_wait_for_rlc_serdes(rdev);
5883 	}
5884 
5885 	return orig;
5886 }
5887 
5888 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5889 {
5890 	u32 tmp, i, mask;
5891 
5892 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5893 	WREG32(RLC_GPR_REG2, tmp);
5894 
5895 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5896 	for (i = 0; i < rdev->usec_timeout; i++) {
5897 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5898 			break;
5899 		udelay(1);
5900 	}
5901 
5902 	for (i = 0; i < rdev->usec_timeout; i++) {
5903 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5904 			break;
5905 		udelay(1);
5906 	}
5907 }
5908 
5909 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5910 {
5911 	u32 tmp;
5912 
5913 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5914 	WREG32(RLC_GPR_REG2, tmp);
5915 }
5916 
5917 /**
5918  * cik_rlc_stop - stop the RLC ME
5919  *
5920  * @rdev: radeon_device pointer
5921  *
5922  * Halt the RLC ME (MicroEngine) (CIK).
5923  */
5924 static void cik_rlc_stop(struct radeon_device *rdev)
5925 {
5926 	WREG32(RLC_CNTL, 0);
5927 
5928 	cik_enable_gui_idle_interrupt(rdev, false);
5929 
5930 	cik_wait_for_rlc_serdes(rdev);
5931 }
5932 
5933 /**
5934  * cik_rlc_start - start the RLC ME
5935  *
5936  * @rdev: radeon_device pointer
5937  *
5938  * Unhalt the RLC ME (MicroEngine) (CIK).
5939  */
5940 static void cik_rlc_start(struct radeon_device *rdev)
5941 {
5942 	WREG32(RLC_CNTL, RLC_ENABLE);
5943 
5944 	cik_enable_gui_idle_interrupt(rdev, true);
5945 
5946 	udelay(50);
5947 }
5948 
5949 /**
5950  * cik_rlc_resume - setup the RLC hw
5951  *
5952  * @rdev: radeon_device pointer
5953  *
5954  * Initialize the RLC registers, load the ucode,
5955  * and start the RLC (CIK).
5956  * Returns 0 for success, -EINVAL if the ucode is not available.
5957  */
5958 static int cik_rlc_resume(struct radeon_device *rdev)
5959 {
5960 	u32 i, size, tmp;
5961 
5962 	if (!rdev->rlc_fw)
5963 		return -EINVAL;
5964 
5965 	cik_rlc_stop(rdev);
5966 
5967 	/* disable CG */
5968 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5969 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5970 
5971 	si_rlc_reset(rdev);
5972 
5973 	cik_init_pg(rdev);
5974 
5975 	cik_init_cg(rdev);
5976 
5977 	WREG32(RLC_LB_CNTR_INIT, 0);
5978 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5979 
5980 	mutex_lock(&rdev->grbm_idx_mutex);
5981 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5982 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5983 	WREG32(RLC_LB_PARAMS, 0x00600408);
5984 	WREG32(RLC_LB_CNTL, 0x80000004);
5985 	mutex_unlock(&rdev->grbm_idx_mutex);
5986 
5987 	WREG32(RLC_MC_CNTL, 0);
5988 	WREG32(RLC_UCODE_CNTL, 0);
5989 
5990 	if (rdev->new_fw) {
5991 		const struct rlc_firmware_header_v1_0 *hdr =
5992 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5993 		const __le32 *fw_data = (const __le32 *)
5994 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5995 
5996 		radeon_ucode_print_rlc_hdr(&hdr->header);
5997 
5998 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5999 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6000 		for (i = 0; i < size; i++)
6001 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6002 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6003 	} else {
6004 		const __be32 *fw_data;
6005 
6006 		switch (rdev->family) {
6007 		case CHIP_BONAIRE:
6008 		case CHIP_HAWAII:
6009 		default:
6010 			size = BONAIRE_RLC_UCODE_SIZE;
6011 			break;
6012 		case CHIP_KAVERI:
6013 			size = KV_RLC_UCODE_SIZE;
6014 			break;
6015 		case CHIP_KABINI:
6016 			size = KB_RLC_UCODE_SIZE;
6017 			break;
6018 		case CHIP_MULLINS:
6019 			size = ML_RLC_UCODE_SIZE;
6020 			break;
6021 		}
6022 
6023 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6024 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6025 		for (i = 0; i < size; i++)
6026 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6027 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6028 	}
6029 
6030 	/* XXX - find out what chips support lbpw */
6031 	cik_enable_lbpw(rdev, false);
6032 
6033 	if (rdev->family == CHIP_BONAIRE)
6034 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6035 
6036 	cik_rlc_start(rdev);
6037 
6038 	return 0;
6039 }
6040 
6041 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6042 {
6043 	u32 data, orig, tmp, tmp2;
6044 
6045 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6046 
6047 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6048 		cik_enable_gui_idle_interrupt(rdev, true);
6049 
6050 		tmp = cik_halt_rlc(rdev);
6051 
6052 		mutex_lock(&rdev->grbm_idx_mutex);
6053 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6054 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6055 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6056 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6057 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6058 		mutex_unlock(&rdev->grbm_idx_mutex);
6059 
6060 		cik_update_rlc(rdev, tmp);
6061 
6062 		data |= CGCG_EN | CGLS_EN;
6063 	} else {
6064 		cik_enable_gui_idle_interrupt(rdev, false);
6065 
6066 		RREG32(CB_CGTT_SCLK_CTRL);
6067 		RREG32(CB_CGTT_SCLK_CTRL);
6068 		RREG32(CB_CGTT_SCLK_CTRL);
6069 		RREG32(CB_CGTT_SCLK_CTRL);
6070 
6071 		data &= ~(CGCG_EN | CGLS_EN);
6072 	}
6073 
6074 	if (orig != data)
6075 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6076 
6077 }
6078 
6079 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6080 {
6081 	u32 data, orig, tmp = 0;
6082 
6083 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6084 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6085 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6086 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6087 				data |= CP_MEM_LS_EN;
6088 				if (orig != data)
6089 					WREG32(CP_MEM_SLP_CNTL, data);
6090 			}
6091 		}
6092 
6093 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6094 		data |= 0x00000001;
6095 		data &= 0xfffffffd;
6096 		if (orig != data)
6097 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6098 
6099 		tmp = cik_halt_rlc(rdev);
6100 
6101 		mutex_lock(&rdev->grbm_idx_mutex);
6102 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6103 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6104 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6105 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6106 		WREG32(RLC_SERDES_WR_CTRL, data);
6107 		mutex_unlock(&rdev->grbm_idx_mutex);
6108 
6109 		cik_update_rlc(rdev, tmp);
6110 
6111 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6112 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6113 			data &= ~SM_MODE_MASK;
6114 			data |= SM_MODE(0x2);
6115 			data |= SM_MODE_ENABLE;
6116 			data &= ~CGTS_OVERRIDE;
6117 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6118 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6119 				data &= ~CGTS_LS_OVERRIDE;
6120 			data &= ~ON_MONITOR_ADD_MASK;
6121 			data |= ON_MONITOR_ADD_EN;
6122 			data |= ON_MONITOR_ADD(0x96);
6123 			if (orig != data)
6124 				WREG32(CGTS_SM_CTRL_REG, data);
6125 		}
6126 	} else {
6127 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6128 		data |= 0x00000003;
6129 		if (orig != data)
6130 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6131 
6132 		data = RREG32(RLC_MEM_SLP_CNTL);
6133 		if (data & RLC_MEM_LS_EN) {
6134 			data &= ~RLC_MEM_LS_EN;
6135 			WREG32(RLC_MEM_SLP_CNTL, data);
6136 		}
6137 
6138 		data = RREG32(CP_MEM_SLP_CNTL);
6139 		if (data & CP_MEM_LS_EN) {
6140 			data &= ~CP_MEM_LS_EN;
6141 			WREG32(CP_MEM_SLP_CNTL, data);
6142 		}
6143 
6144 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6145 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6146 		if (orig != data)
6147 			WREG32(CGTS_SM_CTRL_REG, data);
6148 
6149 		tmp = cik_halt_rlc(rdev);
6150 
6151 		mutex_lock(&rdev->grbm_idx_mutex);
6152 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6153 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6154 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6155 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6156 		WREG32(RLC_SERDES_WR_CTRL, data);
6157 		mutex_unlock(&rdev->grbm_idx_mutex);
6158 
6159 		cik_update_rlc(rdev, tmp);
6160 	}
6161 }
6162 
6163 static const u32 mc_cg_registers[] =
6164 {
6165 	MC_HUB_MISC_HUB_CG,
6166 	MC_HUB_MISC_SIP_CG,
6167 	MC_HUB_MISC_VM_CG,
6168 	MC_XPB_CLK_GAT,
6169 	ATC_MISC_CG,
6170 	MC_CITF_MISC_WR_CG,
6171 	MC_CITF_MISC_RD_CG,
6172 	MC_CITF_MISC_VM_CG,
6173 	VM_L2_CG,
6174 };
6175 
6176 static void cik_enable_mc_ls(struct radeon_device *rdev,
6177 			     bool enable)
6178 {
6179 	int i;
6180 	u32 orig, data;
6181 
6182 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6183 		orig = data = RREG32(mc_cg_registers[i]);
6184 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6185 			data |= MC_LS_ENABLE;
6186 		else
6187 			data &= ~MC_LS_ENABLE;
6188 		if (data != orig)
6189 			WREG32(mc_cg_registers[i], data);
6190 	}
6191 }
6192 
6193 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6194 			       bool enable)
6195 {
6196 	int i;
6197 	u32 orig, data;
6198 
6199 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6200 		orig = data = RREG32(mc_cg_registers[i]);
6201 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6202 			data |= MC_CG_ENABLE;
6203 		else
6204 			data &= ~MC_CG_ENABLE;
6205 		if (data != orig)
6206 			WREG32(mc_cg_registers[i], data);
6207 	}
6208 }
6209 
6210 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6211 				 bool enable)
6212 {
6213 	u32 orig, data;
6214 
6215 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6216 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6217 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6218 	} else {
6219 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6220 		data |= 0xff000000;
6221 		if (data != orig)
6222 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6223 
6224 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6225 		data |= 0xff000000;
6226 		if (data != orig)
6227 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6228 	}
6229 }
6230 
6231 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6232 				 bool enable)
6233 {
6234 	u32 orig, data;
6235 
6236 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6237 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6238 		data |= 0x100;
6239 		if (orig != data)
6240 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6241 
6242 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6243 		data |= 0x100;
6244 		if (orig != data)
6245 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6246 	} else {
6247 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6248 		data &= ~0x100;
6249 		if (orig != data)
6250 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6251 
6252 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6253 		data &= ~0x100;
6254 		if (orig != data)
6255 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6256 	}
6257 }
6258 
6259 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6260 				bool enable)
6261 {
6262 	u32 orig, data;
6263 
6264 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6265 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6266 		data = 0xfff;
6267 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6268 
6269 		orig = data = RREG32(UVD_CGC_CTRL);
6270 		data |= DCM;
6271 		if (orig != data)
6272 			WREG32(UVD_CGC_CTRL, data);
6273 	} else {
6274 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6275 		data &= ~0xfff;
6276 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6277 
6278 		orig = data = RREG32(UVD_CGC_CTRL);
6279 		data &= ~DCM;
6280 		if (orig != data)
6281 			WREG32(UVD_CGC_CTRL, data);
6282 	}
6283 }
6284 
6285 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6286 			       bool enable)
6287 {
6288 	u32 orig, data;
6289 
6290 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6291 
6292 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6293 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6294 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6295 	else
6296 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6297 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6298 
6299 	if (orig != data)
6300 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6301 }
6302 
6303 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6304 				bool enable)
6305 {
6306 	u32 orig, data;
6307 
6308 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6309 
6310 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6311 		data &= ~CLOCK_GATING_DIS;
6312 	else
6313 		data |= CLOCK_GATING_DIS;
6314 
6315 	if (orig != data)
6316 		WREG32(HDP_HOST_PATH_CNTL, data);
6317 }
6318 
6319 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6320 			      bool enable)
6321 {
6322 	u32 orig, data;
6323 
6324 	orig = data = RREG32(HDP_MEM_POWER_LS);
6325 
6326 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6327 		data |= HDP_LS_ENABLE;
6328 	else
6329 		data &= ~HDP_LS_ENABLE;
6330 
6331 	if (orig != data)
6332 		WREG32(HDP_MEM_POWER_LS, data);
6333 }
6334 
6335 void cik_update_cg(struct radeon_device *rdev,
6336 		   u32 block, bool enable)
6337 {
6338 
6339 	if (block & RADEON_CG_BLOCK_GFX) {
6340 		cik_enable_gui_idle_interrupt(rdev, false);
6341 		/* order matters! */
6342 		if (enable) {
6343 			cik_enable_mgcg(rdev, true);
6344 			cik_enable_cgcg(rdev, true);
6345 		} else {
6346 			cik_enable_cgcg(rdev, false);
6347 			cik_enable_mgcg(rdev, false);
6348 		}
6349 		cik_enable_gui_idle_interrupt(rdev, true);
6350 	}
6351 
6352 	if (block & RADEON_CG_BLOCK_MC) {
6353 		if (!(rdev->flags & RADEON_IS_IGP)) {
6354 			cik_enable_mc_mgcg(rdev, enable);
6355 			cik_enable_mc_ls(rdev, enable);
6356 		}
6357 	}
6358 
6359 	if (block & RADEON_CG_BLOCK_SDMA) {
6360 		cik_enable_sdma_mgcg(rdev, enable);
6361 		cik_enable_sdma_mgls(rdev, enable);
6362 	}
6363 
6364 	if (block & RADEON_CG_BLOCK_BIF) {
6365 		cik_enable_bif_mgls(rdev, enable);
6366 	}
6367 
6368 	if (block & RADEON_CG_BLOCK_UVD) {
6369 		if (rdev->has_uvd)
6370 			cik_enable_uvd_mgcg(rdev, enable);
6371 	}
6372 
6373 	if (block & RADEON_CG_BLOCK_HDP) {
6374 		cik_enable_hdp_mgcg(rdev, enable);
6375 		cik_enable_hdp_ls(rdev, enable);
6376 	}
6377 
6378 	if (block & RADEON_CG_BLOCK_VCE) {
6379 		vce_v2_0_enable_mgcg(rdev, enable);
6380 	}
6381 }
6382 
6383 static void cik_init_cg(struct radeon_device *rdev)
6384 {
6385 
6386 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6387 
6388 	if (rdev->has_uvd)
6389 		si_init_uvd_internal_cg(rdev);
6390 
6391 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6392 			     RADEON_CG_BLOCK_SDMA |
6393 			     RADEON_CG_BLOCK_BIF |
6394 			     RADEON_CG_BLOCK_UVD |
6395 			     RADEON_CG_BLOCK_HDP), true);
6396 }
6397 
6398 static void cik_fini_cg(struct radeon_device *rdev)
6399 {
6400 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6401 			     RADEON_CG_BLOCK_SDMA |
6402 			     RADEON_CG_BLOCK_BIF |
6403 			     RADEON_CG_BLOCK_UVD |
6404 			     RADEON_CG_BLOCK_HDP), false);
6405 
6406 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6407 }
6408 
6409 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6410 					  bool enable)
6411 {
6412 	u32 data, orig;
6413 
6414 	orig = data = RREG32(RLC_PG_CNTL);
6415 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6416 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6417 	else
6418 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6419 	if (orig != data)
6420 		WREG32(RLC_PG_CNTL, data);
6421 }
6422 
6423 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6424 					  bool enable)
6425 {
6426 	u32 data, orig;
6427 
6428 	orig = data = RREG32(RLC_PG_CNTL);
6429 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6430 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6431 	else
6432 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6433 	if (orig != data)
6434 		WREG32(RLC_PG_CNTL, data);
6435 }
6436 
6437 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6438 {
6439 	u32 data, orig;
6440 
6441 	orig = data = RREG32(RLC_PG_CNTL);
6442 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6443 		data &= ~DISABLE_CP_PG;
6444 	else
6445 		data |= DISABLE_CP_PG;
6446 	if (orig != data)
6447 		WREG32(RLC_PG_CNTL, data);
6448 }
6449 
6450 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6451 {
6452 	u32 data, orig;
6453 
6454 	orig = data = RREG32(RLC_PG_CNTL);
6455 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6456 		data &= ~DISABLE_GDS_PG;
6457 	else
6458 		data |= DISABLE_GDS_PG;
6459 	if (orig != data)
6460 		WREG32(RLC_PG_CNTL, data);
6461 }
6462 
6463 #define CP_ME_TABLE_SIZE    96
6464 #define CP_ME_TABLE_OFFSET  2048
6465 #define CP_MEC_TABLE_OFFSET 4096
6466 
6467 void cik_init_cp_pg_table(struct radeon_device *rdev)
6468 {
6469 	volatile u32 *dst_ptr;
6470 	int me, i, max_me = 4;
6471 	u32 bo_offset = 0;
6472 	u32 table_offset, table_size;
6473 
6474 	if (rdev->family == CHIP_KAVERI)
6475 		max_me = 5;
6476 
6477 	if (rdev->rlc.cp_table_ptr == NULL)
6478 		return;
6479 
6480 	/* write the cp table buffer */
6481 	dst_ptr = rdev->rlc.cp_table_ptr;
6482 	for (me = 0; me < max_me; me++) {
6483 		if (rdev->new_fw) {
6484 			const __le32 *fw_data;
6485 			const struct gfx_firmware_header_v1_0 *hdr;
6486 
6487 			if (me == 0) {
6488 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6489 				fw_data = (const __le32 *)
6490 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6491 				table_offset = le32_to_cpu(hdr->jt_offset);
6492 				table_size = le32_to_cpu(hdr->jt_size);
6493 			} else if (me == 1) {
6494 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6495 				fw_data = (const __le32 *)
6496 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6497 				table_offset = le32_to_cpu(hdr->jt_offset);
6498 				table_size = le32_to_cpu(hdr->jt_size);
6499 			} else if (me == 2) {
6500 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6501 				fw_data = (const __le32 *)
6502 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6503 				table_offset = le32_to_cpu(hdr->jt_offset);
6504 				table_size = le32_to_cpu(hdr->jt_size);
6505 			} else if (me == 3) {
6506 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6507 				fw_data = (const __le32 *)
6508 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6509 				table_offset = le32_to_cpu(hdr->jt_offset);
6510 				table_size = le32_to_cpu(hdr->jt_size);
6511 			} else {
6512 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6513 				fw_data = (const __le32 *)
6514 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6515 				table_offset = le32_to_cpu(hdr->jt_offset);
6516 				table_size = le32_to_cpu(hdr->jt_size);
6517 			}
6518 
6519 			for (i = 0; i < table_size; i ++) {
6520 				dst_ptr[bo_offset + i] =
6521 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6522 			}
6523 			bo_offset += table_size;
6524 		} else {
6525 			const __be32 *fw_data;
6526 			table_size = CP_ME_TABLE_SIZE;
6527 
6528 			if (me == 0) {
6529 				fw_data = (const __be32 *)rdev->ce_fw->data;
6530 				table_offset = CP_ME_TABLE_OFFSET;
6531 			} else if (me == 1) {
6532 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6533 				table_offset = CP_ME_TABLE_OFFSET;
6534 			} else if (me == 2) {
6535 				fw_data = (const __be32 *)rdev->me_fw->data;
6536 				table_offset = CP_ME_TABLE_OFFSET;
6537 			} else {
6538 				fw_data = (const __be32 *)rdev->mec_fw->data;
6539 				table_offset = CP_MEC_TABLE_OFFSET;
6540 			}
6541 
6542 			for (i = 0; i < table_size; i ++) {
6543 				dst_ptr[bo_offset + i] =
6544 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6545 			}
6546 			bo_offset += table_size;
6547 		}
6548 	}
6549 }
6550 
6551 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6552 				bool enable)
6553 {
6554 	u32 data, orig;
6555 
6556 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6557 		orig = data = RREG32(RLC_PG_CNTL);
6558 		data |= GFX_PG_ENABLE;
6559 		if (orig != data)
6560 			WREG32(RLC_PG_CNTL, data);
6561 
6562 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6563 		data |= AUTO_PG_EN;
6564 		if (orig != data)
6565 			WREG32(RLC_AUTO_PG_CTRL, data);
6566 	} else {
6567 		orig = data = RREG32(RLC_PG_CNTL);
6568 		data &= ~GFX_PG_ENABLE;
6569 		if (orig != data)
6570 			WREG32(RLC_PG_CNTL, data);
6571 
6572 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6573 		data &= ~AUTO_PG_EN;
6574 		if (orig != data)
6575 			WREG32(RLC_AUTO_PG_CTRL, data);
6576 
6577 		data = RREG32(DB_RENDER_CONTROL);
6578 	}
6579 }
6580 
6581 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6582 {
6583 	u32 mask = 0, tmp, tmp1;
6584 	int i;
6585 
6586 	mutex_lock(&rdev->grbm_idx_mutex);
6587 	cik_select_se_sh(rdev, se, sh);
6588 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6589 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6590 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6591 	mutex_unlock(&rdev->grbm_idx_mutex);
6592 
6593 	tmp &= 0xffff0000;
6594 
6595 	tmp |= tmp1;
6596 	tmp >>= 16;
6597 
6598 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6599 		mask <<= 1;
6600 		mask |= 1;
6601 	}
6602 
6603 	return (~tmp) & mask;
6604 }
6605 
6606 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6607 {
6608 	u32 i, j, k, active_cu_number = 0;
6609 	u32 mask, counter, cu_bitmap;
6610 	u32 tmp = 0;
6611 
6612 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6613 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6614 			mask = 1;
6615 			cu_bitmap = 0;
6616 			counter = 0;
6617 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6618 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6619 					if (counter < 2)
6620 						cu_bitmap |= mask;
6621 					counter ++;
6622 				}
6623 				mask <<= 1;
6624 			}
6625 
6626 			active_cu_number += counter;
6627 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6628 		}
6629 	}
6630 
6631 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6632 
6633 	tmp = RREG32(RLC_MAX_PG_CU);
6634 	tmp &= ~MAX_PU_CU_MASK;
6635 	tmp |= MAX_PU_CU(active_cu_number);
6636 	WREG32(RLC_MAX_PG_CU, tmp);
6637 }
6638 
6639 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6640 				       bool enable)
6641 {
6642 	u32 data, orig;
6643 
6644 	orig = data = RREG32(RLC_PG_CNTL);
6645 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6646 		data |= STATIC_PER_CU_PG_ENABLE;
6647 	else
6648 		data &= ~STATIC_PER_CU_PG_ENABLE;
6649 	if (orig != data)
6650 		WREG32(RLC_PG_CNTL, data);
6651 }
6652 
6653 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6654 					bool enable)
6655 {
6656 	u32 data, orig;
6657 
6658 	orig = data = RREG32(RLC_PG_CNTL);
6659 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6660 		data |= DYN_PER_CU_PG_ENABLE;
6661 	else
6662 		data &= ~DYN_PER_CU_PG_ENABLE;
6663 	if (orig != data)
6664 		WREG32(RLC_PG_CNTL, data);
6665 }
6666 
6667 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6668 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6669 
6670 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6671 {
6672 	u32 data, orig;
6673 	u32 i;
6674 
6675 	if (rdev->rlc.cs_data) {
6676 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6677 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6678 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6679 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6680 	} else {
6681 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6682 		for (i = 0; i < 3; i++)
6683 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6684 	}
6685 	if (rdev->rlc.reg_list) {
6686 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6687 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6688 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6689 	}
6690 
6691 	orig = data = RREG32(RLC_PG_CNTL);
6692 	data |= GFX_PG_SRC;
6693 	if (orig != data)
6694 		WREG32(RLC_PG_CNTL, data);
6695 
6696 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6697 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6698 
6699 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6700 	data &= ~IDLE_POLL_COUNT_MASK;
6701 	data |= IDLE_POLL_COUNT(0x60);
6702 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6703 
6704 	data = 0x10101010;
6705 	WREG32(RLC_PG_DELAY, data);
6706 
6707 	data = RREG32(RLC_PG_DELAY_2);
6708 	data &= ~0xff;
6709 	data |= 0x3;
6710 	WREG32(RLC_PG_DELAY_2, data);
6711 
6712 	data = RREG32(RLC_AUTO_PG_CTRL);
6713 	data &= ~GRBM_REG_SGIT_MASK;
6714 	data |= GRBM_REG_SGIT(0x700);
6715 	WREG32(RLC_AUTO_PG_CTRL, data);
6716 
6717 }
6718 
6719 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6720 {
6721 	cik_enable_gfx_cgpg(rdev, enable);
6722 	cik_enable_gfx_static_mgpg(rdev, enable);
6723 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6724 }
6725 
6726 u32 cik_get_csb_size(struct radeon_device *rdev)
6727 {
6728 	u32 count = 0;
6729 	const struct cs_section_def *sect = NULL;
6730 	const struct cs_extent_def *ext = NULL;
6731 
6732 	if (rdev->rlc.cs_data == NULL)
6733 		return 0;
6734 
6735 	/* begin clear state */
6736 	count += 2;
6737 	/* context control state */
6738 	count += 3;
6739 
6740 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6741 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6742 			if (sect->id == SECT_CONTEXT)
6743 				count += 2 + ext->reg_count;
6744 			else
6745 				return 0;
6746 		}
6747 	}
6748 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6749 	count += 4;
6750 	/* end clear state */
6751 	count += 2;
6752 	/* clear state */
6753 	count += 2;
6754 
6755 	return count;
6756 }
6757 
6758 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6759 {
6760 	u32 count = 0, i;
6761 	const struct cs_section_def *sect = NULL;
6762 	const struct cs_extent_def *ext = NULL;
6763 
6764 	if (rdev->rlc.cs_data == NULL)
6765 		return;
6766 	if (buffer == NULL)
6767 		return;
6768 
6769 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6770 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6771 
6772 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6773 	buffer[count++] = cpu_to_le32(0x80000000);
6774 	buffer[count++] = cpu_to_le32(0x80000000);
6775 
6776 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6777 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6778 			if (sect->id == SECT_CONTEXT) {
6779 				buffer[count++] =
6780 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6781 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6782 				for (i = 0; i < ext->reg_count; i++)
6783 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6784 			} else {
6785 				return;
6786 			}
6787 		}
6788 	}
6789 
6790 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6791 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6792 	switch (rdev->family) {
6793 	case CHIP_BONAIRE:
6794 		buffer[count++] = cpu_to_le32(0x16000012);
6795 		buffer[count++] = cpu_to_le32(0x00000000);
6796 		break;
6797 	case CHIP_KAVERI:
6798 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6799 		buffer[count++] = cpu_to_le32(0x00000000);
6800 		break;
6801 	case CHIP_KABINI:
6802 	case CHIP_MULLINS:
6803 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6804 		buffer[count++] = cpu_to_le32(0x00000000);
6805 		break;
6806 	case CHIP_HAWAII:
6807 		buffer[count++] = cpu_to_le32(0x3a00161a);
6808 		buffer[count++] = cpu_to_le32(0x0000002e);
6809 		break;
6810 	default:
6811 		buffer[count++] = cpu_to_le32(0x00000000);
6812 		buffer[count++] = cpu_to_le32(0x00000000);
6813 		break;
6814 	}
6815 
6816 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6817 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6818 
6819 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6820 	buffer[count++] = cpu_to_le32(0);
6821 }
6822 
6823 static void cik_init_pg(struct radeon_device *rdev)
6824 {
6825 	if (rdev->pg_flags) {
6826 		cik_enable_sck_slowdown_on_pu(rdev, true);
6827 		cik_enable_sck_slowdown_on_pd(rdev, true);
6828 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6829 			cik_init_gfx_cgpg(rdev);
6830 			cik_enable_cp_pg(rdev, true);
6831 			cik_enable_gds_pg(rdev, true);
6832 		}
6833 		cik_init_ao_cu_mask(rdev);
6834 		cik_update_gfx_pg(rdev, true);
6835 	}
6836 }
6837 
6838 static void cik_fini_pg(struct radeon_device *rdev)
6839 {
6840 	if (rdev->pg_flags) {
6841 		cik_update_gfx_pg(rdev, false);
6842 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6843 			cik_enable_cp_pg(rdev, false);
6844 			cik_enable_gds_pg(rdev, false);
6845 		}
6846 	}
6847 }
6848 
6849 /*
6850  * Interrupts
6851  * Starting with r6xx, interrupts are handled via a ring buffer.
6852  * Ring buffers are areas of GPU accessible memory that the GPU
6853  * writes interrupt vectors into and the host reads vectors out of.
6854  * There is a rptr (read pointer) that determines where the
6855  * host is currently reading, and a wptr (write pointer)
6856  * which determines where the GPU has written.  When the
6857  * pointers are equal, the ring is idle.  When the GPU
6858  * writes vectors to the ring buffer, it increments the
6859  * wptr.  When there is an interrupt, the host then starts
6860  * fetching commands and processing them until the pointers are
6861  * equal again at which point it updates the rptr.
6862  */
6863 
6864 /**
6865  * cik_enable_interrupts - Enable the interrupt ring buffer
6866  *
6867  * @rdev: radeon_device pointer
6868  *
6869  * Enable the interrupt ring buffer (CIK).
6870  */
6871 static void cik_enable_interrupts(struct radeon_device *rdev)
6872 {
6873 	u32 ih_cntl = RREG32(IH_CNTL);
6874 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6875 
6876 	ih_cntl |= ENABLE_INTR;
6877 	ih_rb_cntl |= IH_RB_ENABLE;
6878 	WREG32(IH_CNTL, ih_cntl);
6879 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6880 	rdev->ih.enabled = true;
6881 }
6882 
6883 /**
6884  * cik_disable_interrupts - Disable the interrupt ring buffer
6885  *
6886  * @rdev: radeon_device pointer
6887  *
6888  * Disable the interrupt ring buffer (CIK).
6889  */
6890 static void cik_disable_interrupts(struct radeon_device *rdev)
6891 {
6892 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6893 	u32 ih_cntl = RREG32(IH_CNTL);
6894 
6895 	ih_rb_cntl &= ~IH_RB_ENABLE;
6896 	ih_cntl &= ~ENABLE_INTR;
6897 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6898 	WREG32(IH_CNTL, ih_cntl);
6899 	/* set rptr, wptr to 0 */
6900 	WREG32(IH_RB_RPTR, 0);
6901 	WREG32(IH_RB_WPTR, 0);
6902 	rdev->ih.enabled = false;
6903 	rdev->ih.rptr = 0;
6904 }
6905 
6906 /**
6907  * cik_disable_interrupt_state - Disable all interrupt sources
6908  *
6909  * @rdev: radeon_device pointer
6910  *
6911  * Clear all interrupt enable bits used by the driver (CIK).
6912  */
6913 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6914 {
6915 	u32 tmp;
6916 
6917 	/* gfx ring */
6918 	tmp = RREG32(CP_INT_CNTL_RING0) &
6919 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6920 	WREG32(CP_INT_CNTL_RING0, tmp);
6921 	/* sdma */
6922 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6923 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6924 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6925 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6926 	/* compute queues */
6927 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6928 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6929 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6930 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6931 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6932 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6933 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6934 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6935 	/* grbm */
6936 	WREG32(GRBM_INT_CNTL, 0);
6937 	/* SRBM */
6938 	WREG32(SRBM_INT_CNTL, 0);
6939 	/* vline/vblank, etc. */
6940 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6941 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6942 	if (rdev->num_crtc >= 4) {
6943 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6944 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6945 	}
6946 	if (rdev->num_crtc >= 6) {
6947 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6948 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6949 	}
6950 	/* pflip */
6951 	if (rdev->num_crtc >= 2) {
6952 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6953 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6954 	}
6955 	if (rdev->num_crtc >= 4) {
6956 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6957 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6958 	}
6959 	if (rdev->num_crtc >= 6) {
6960 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6961 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6962 	}
6963 
6964 	/* dac hotplug */
6965 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6966 
6967 	/* digital hotplug */
6968 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6969 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6970 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6971 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6972 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6973 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6974 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6975 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6976 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6977 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6978 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6979 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6980 
6981 }
6982 
6983 /**
6984  * cik_irq_init - init and enable the interrupt ring
6985  *
6986  * @rdev: radeon_device pointer
6987  *
6988  * Allocate a ring buffer for the interrupt controller,
6989  * enable the RLC, disable interrupts, enable the IH
6990  * ring buffer and enable it (CIK).
6991  * Called at device load and reume.
6992  * Returns 0 for success, errors for failure.
6993  */
6994 static int cik_irq_init(struct radeon_device *rdev)
6995 {
6996 	int ret = 0;
6997 	int rb_bufsz;
6998 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6999 
7000 	/* allocate ring */
7001 	ret = r600_ih_ring_alloc(rdev);
7002 	if (ret)
7003 		return ret;
7004 
7005 	/* disable irqs */
7006 	cik_disable_interrupts(rdev);
7007 
7008 	/* init rlc */
7009 	ret = cik_rlc_resume(rdev);
7010 	if (ret) {
7011 		r600_ih_ring_fini(rdev);
7012 		return ret;
7013 	}
7014 
7015 	/* setup interrupt control */
7016 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7017 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7018 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7019 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7020 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7021 	 */
7022 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7023 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7024 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7025 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7026 
7027 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7028 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7029 
7030 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7031 		      IH_WPTR_OVERFLOW_CLEAR |
7032 		      (rb_bufsz << 1));
7033 
7034 	if (rdev->wb.enabled)
7035 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7036 
7037 	/* set the writeback address whether it's enabled or not */
7038 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7039 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7040 
7041 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7042 
7043 	/* set rptr, wptr to 0 */
7044 	WREG32(IH_RB_RPTR, 0);
7045 	WREG32(IH_RB_WPTR, 0);
7046 
7047 	/* Default settings for IH_CNTL (disabled at first) */
7048 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7049 	/* RPTR_REARM only works if msi's are enabled */
7050 	if (rdev->msi_enabled)
7051 		ih_cntl |= RPTR_REARM;
7052 	WREG32(IH_CNTL, ih_cntl);
7053 
7054 	/* force the active interrupt state to all disabled */
7055 	cik_disable_interrupt_state(rdev);
7056 
7057 	pci_set_master(rdev->pdev);
7058 
7059 	/* enable irqs */
7060 	cik_enable_interrupts(rdev);
7061 
7062 	return ret;
7063 }
7064 
7065 /**
7066  * cik_irq_set - enable/disable interrupt sources
7067  *
7068  * @rdev: radeon_device pointer
7069  *
7070  * Enable interrupt sources on the GPU (vblanks, hpd,
7071  * etc.) (CIK).
7072  * Returns 0 for success, errors for failure.
7073  */
7074 int cik_irq_set(struct radeon_device *rdev)
7075 {
7076 	u32 cp_int_cntl;
7077 	u32 cp_m1p0;
7078 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7079 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7080 	u32 grbm_int_cntl = 0;
7081 	u32 dma_cntl, dma_cntl1;
7082 
7083 	if (!rdev->irq.installed) {
7084 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7085 		return -EINVAL;
7086 	}
7087 	/* don't enable anything if the ih is disabled */
7088 	if (!rdev->ih.enabled) {
7089 		cik_disable_interrupts(rdev);
7090 		/* force the active interrupt state to all disabled */
7091 		cik_disable_interrupt_state(rdev);
7092 		return 0;
7093 	}
7094 
7095 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7096 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7097 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7098 
7099 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7100 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7101 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7102 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7103 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7104 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7105 
7106 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7107 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7108 
7109 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7110 
7111 	/* enable CP interrupts on all rings */
7112 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7113 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7114 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7115 	}
7116 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7117 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7118 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7119 		if (ring->me == 1) {
7120 			switch (ring->pipe) {
7121 			case 0:
7122 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7123 				break;
7124 			default:
7125 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7126 				break;
7127 			}
7128 		} else {
7129 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7130 		}
7131 	}
7132 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7133 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7134 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7135 		if (ring->me == 1) {
7136 			switch (ring->pipe) {
7137 			case 0:
7138 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7139 				break;
7140 			default:
7141 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142 				break;
7143 			}
7144 		} else {
7145 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7146 		}
7147 	}
7148 
7149 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7150 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7151 		dma_cntl |= TRAP_ENABLE;
7152 	}
7153 
7154 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7155 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7156 		dma_cntl1 |= TRAP_ENABLE;
7157 	}
7158 
7159 	if (rdev->irq.crtc_vblank_int[0] ||
7160 	    atomic_read(&rdev->irq.pflip[0])) {
7161 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7162 		crtc1 |= VBLANK_INTERRUPT_MASK;
7163 	}
7164 	if (rdev->irq.crtc_vblank_int[1] ||
7165 	    atomic_read(&rdev->irq.pflip[1])) {
7166 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7167 		crtc2 |= VBLANK_INTERRUPT_MASK;
7168 	}
7169 	if (rdev->irq.crtc_vblank_int[2] ||
7170 	    atomic_read(&rdev->irq.pflip[2])) {
7171 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7172 		crtc3 |= VBLANK_INTERRUPT_MASK;
7173 	}
7174 	if (rdev->irq.crtc_vblank_int[3] ||
7175 	    atomic_read(&rdev->irq.pflip[3])) {
7176 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7177 		crtc4 |= VBLANK_INTERRUPT_MASK;
7178 	}
7179 	if (rdev->irq.crtc_vblank_int[4] ||
7180 	    atomic_read(&rdev->irq.pflip[4])) {
7181 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7182 		crtc5 |= VBLANK_INTERRUPT_MASK;
7183 	}
7184 	if (rdev->irq.crtc_vblank_int[5] ||
7185 	    atomic_read(&rdev->irq.pflip[5])) {
7186 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7187 		crtc6 |= VBLANK_INTERRUPT_MASK;
7188 	}
7189 	if (rdev->irq.hpd[0]) {
7190 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7191 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7192 	}
7193 	if (rdev->irq.hpd[1]) {
7194 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7195 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7196 	}
7197 	if (rdev->irq.hpd[2]) {
7198 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7199 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7200 	}
7201 	if (rdev->irq.hpd[3]) {
7202 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7203 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7204 	}
7205 	if (rdev->irq.hpd[4]) {
7206 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7207 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7208 	}
7209 	if (rdev->irq.hpd[5]) {
7210 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7211 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7212 	}
7213 
7214 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7215 
7216 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7217 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7218 
7219 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7220 
7221 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7222 
7223 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7224 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7225 	if (rdev->num_crtc >= 4) {
7226 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7227 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7228 	}
7229 	if (rdev->num_crtc >= 6) {
7230 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7231 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7232 	}
7233 
7234 	if (rdev->num_crtc >= 2) {
7235 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7236 		       GRPH_PFLIP_INT_MASK);
7237 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7238 		       GRPH_PFLIP_INT_MASK);
7239 	}
7240 	if (rdev->num_crtc >= 4) {
7241 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7242 		       GRPH_PFLIP_INT_MASK);
7243 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7244 		       GRPH_PFLIP_INT_MASK);
7245 	}
7246 	if (rdev->num_crtc >= 6) {
7247 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7248 		       GRPH_PFLIP_INT_MASK);
7249 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7250 		       GRPH_PFLIP_INT_MASK);
7251 	}
7252 
7253 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7254 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7255 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7256 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7257 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7258 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7259 
7260 	/* posting read */
7261 	RREG32(SRBM_STATUS);
7262 
7263 	return 0;
7264 }
7265 
7266 /**
7267  * cik_irq_ack - ack interrupt sources
7268  *
7269  * @rdev: radeon_device pointer
7270  *
7271  * Ack interrupt sources on the GPU (vblanks, hpd,
7272  * etc.) (CIK).  Certain interrupts sources are sw
7273  * generated and do not require an explicit ack.
7274  */
7275 static inline void cik_irq_ack(struct radeon_device *rdev)
7276 {
7277 	u32 tmp;
7278 
7279 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7280 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7281 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7282 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7283 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7284 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7285 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7286 
7287 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7288 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7289 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7290 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7291 	if (rdev->num_crtc >= 4) {
7292 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7293 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7294 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7295 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7296 	}
7297 	if (rdev->num_crtc >= 6) {
7298 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7299 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7300 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7301 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7302 	}
7303 
7304 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7305 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7306 		       GRPH_PFLIP_INT_CLEAR);
7307 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7308 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7309 		       GRPH_PFLIP_INT_CLEAR);
7310 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7311 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7312 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7313 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7314 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7315 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7316 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7317 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7318 
7319 	if (rdev->num_crtc >= 4) {
7320 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7321 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7322 			       GRPH_PFLIP_INT_CLEAR);
7323 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7324 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7325 			       GRPH_PFLIP_INT_CLEAR);
7326 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7327 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7328 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7329 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7330 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7331 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7332 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7333 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7334 	}
7335 
7336 	if (rdev->num_crtc >= 6) {
7337 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7338 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7339 			       GRPH_PFLIP_INT_CLEAR);
7340 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7341 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7342 			       GRPH_PFLIP_INT_CLEAR);
7343 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7344 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7345 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7346 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7347 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7348 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7349 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7350 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7351 	}
7352 
7353 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7354 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7355 		tmp |= DC_HPDx_INT_ACK;
7356 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7357 	}
7358 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7359 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7360 		tmp |= DC_HPDx_INT_ACK;
7361 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7362 	}
7363 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7364 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7365 		tmp |= DC_HPDx_INT_ACK;
7366 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7367 	}
7368 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7369 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7370 		tmp |= DC_HPDx_INT_ACK;
7371 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7372 	}
7373 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7374 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7375 		tmp |= DC_HPDx_INT_ACK;
7376 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7377 	}
7378 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7379 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7380 		tmp |= DC_HPDx_INT_ACK;
7381 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7382 	}
7383 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7384 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7385 		tmp |= DC_HPDx_RX_INT_ACK;
7386 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7387 	}
7388 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7389 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7390 		tmp |= DC_HPDx_RX_INT_ACK;
7391 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7392 	}
7393 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7394 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7395 		tmp |= DC_HPDx_RX_INT_ACK;
7396 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7397 	}
7398 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7399 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7400 		tmp |= DC_HPDx_RX_INT_ACK;
7401 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7402 	}
7403 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7404 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7405 		tmp |= DC_HPDx_RX_INT_ACK;
7406 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7407 	}
7408 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7409 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7410 		tmp |= DC_HPDx_RX_INT_ACK;
7411 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7412 	}
7413 }
7414 
7415 /**
7416  * cik_irq_disable - disable interrupts
7417  *
7418  * @rdev: radeon_device pointer
7419  *
7420  * Disable interrupts on the hw (CIK).
7421  */
7422 static void cik_irq_disable(struct radeon_device *rdev)
7423 {
7424 	cik_disable_interrupts(rdev);
7425 	/* Wait and acknowledge irq */
7426 	mdelay(1);
7427 	cik_irq_ack(rdev);
7428 	cik_disable_interrupt_state(rdev);
7429 }
7430 
7431 /**
7432  * cik_irq_disable - disable interrupts for suspend
7433  *
7434  * @rdev: radeon_device pointer
7435  *
7436  * Disable interrupts and stop the RLC (CIK).
7437  * Used for suspend.
7438  */
7439 static void cik_irq_suspend(struct radeon_device *rdev)
7440 {
7441 	cik_irq_disable(rdev);
7442 	cik_rlc_stop(rdev);
7443 }
7444 
7445 /**
7446  * cik_irq_fini - tear down interrupt support
7447  *
7448  * @rdev: radeon_device pointer
7449  *
7450  * Disable interrupts on the hw and free the IH ring
7451  * buffer (CIK).
7452  * Used for driver unload.
7453  */
7454 static void cik_irq_fini(struct radeon_device *rdev)
7455 {
7456 	cik_irq_suspend(rdev);
7457 	r600_ih_ring_fini(rdev);
7458 }
7459 
7460 /**
7461  * cik_get_ih_wptr - get the IH ring buffer wptr
7462  *
7463  * @rdev: radeon_device pointer
7464  *
7465  * Get the IH ring buffer wptr from either the register
7466  * or the writeback memory buffer (CIK).  Also check for
7467  * ring buffer overflow and deal with it.
7468  * Used by cik_irq_process().
7469  * Returns the value of the wptr.
7470  */
7471 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7472 {
7473 	u32 wptr, tmp;
7474 
7475 	if (rdev->wb.enabled)
7476 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7477 	else
7478 		wptr = RREG32(IH_RB_WPTR);
7479 
7480 	if (wptr & RB_OVERFLOW) {
7481 		wptr &= ~RB_OVERFLOW;
7482 		/* When a ring buffer overflow happen start parsing interrupt
7483 		 * from the last not overwritten vector (wptr + 16). Hopefully
7484 		 * this should allow us to catchup.
7485 		 */
7486 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7487 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7488 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7489 		tmp = RREG32(IH_RB_CNTL);
7490 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7491 		WREG32(IH_RB_CNTL, tmp);
7492 	}
7493 	return (wptr & rdev->ih.ptr_mask);
7494 }
7495 
7496 /*        CIK IV Ring
7497  * Each IV ring entry is 128 bits:
7498  * [7:0]    - interrupt source id
7499  * [31:8]   - reserved
7500  * [59:32]  - interrupt source data
7501  * [63:60]  - reserved
7502  * [71:64]  - RINGID
7503  *            CP:
7504  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7505  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7506  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7507  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7508  *            PIPE_ID - ME0 0=3D
7509  *                    - ME1&2 compute dispatcher (4 pipes each)
7510  *            SDMA:
7511  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7512  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7513  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7514  * [79:72]  - VMID
7515  * [95:80]  - PASID
7516  * [127:96] - reserved
7517  */
7518 /**
7519  * cik_irq_process - interrupt handler
7520  *
7521  * @rdev: radeon_device pointer
7522  *
7523  * Interrupt hander (CIK).  Walk the IH ring,
7524  * ack interrupts and schedule work to handle
7525  * interrupt events.
7526  * Returns irq process return code.
7527  */
7528 int cik_irq_process(struct radeon_device *rdev)
7529 {
7530 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7531 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7532 	u32 wptr;
7533 	u32 rptr;
7534 	u32 src_id, src_data, ring_id;
7535 	u8 me_id, pipe_id, queue_id;
7536 	u32 ring_index;
7537 	bool queue_hotplug = false;
7538 	bool queue_dp = false;
7539 	bool queue_reset = false;
7540 	u32 addr, status, mc_client;
7541 	bool queue_thermal = false;
7542 
7543 	if (!rdev->ih.enabled || rdev->shutdown)
7544 		return IRQ_NONE;
7545 
7546 	wptr = cik_get_ih_wptr(rdev);
7547 
7548 restart_ih:
7549 	/* is somebody else already processing irqs? */
7550 	if (atomic_xchg(&rdev->ih.lock, 1))
7551 		return IRQ_NONE;
7552 
7553 	rptr = rdev->ih.rptr;
7554 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7555 
7556 	/* Order reading of wptr vs. reading of IH ring data */
7557 	rmb();
7558 
7559 	/* display interrupts */
7560 	cik_irq_ack(rdev);
7561 
7562 	while (rptr != wptr) {
7563 		/* wptr/rptr are in bytes! */
7564 		ring_index = rptr / 4;
7565 
7566 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7567 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7568 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7569 
7570 		switch (src_id) {
7571 		case 1: /* D1 vblank/vline */
7572 			switch (src_data) {
7573 			case 0: /* D1 vblank */
7574 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7575 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7576 
7577 				if (rdev->irq.crtc_vblank_int[0]) {
7578 					drm_handle_vblank(rdev->ddev, 0);
7579 					rdev->pm.vblank_sync = true;
7580 					wake_up(&rdev->irq.vblank_queue);
7581 				}
7582 				if (atomic_read(&rdev->irq.pflip[0]))
7583 					radeon_crtc_handle_vblank(rdev, 0);
7584 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7585 				DRM_DEBUG("IH: D1 vblank\n");
7586 
7587 				break;
7588 			case 1: /* D1 vline */
7589 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7590 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7591 
7592 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7593 				DRM_DEBUG("IH: D1 vline\n");
7594 
7595 				break;
7596 			default:
7597 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7598 				break;
7599 			}
7600 			break;
7601 		case 2: /* D2 vblank/vline */
7602 			switch (src_data) {
7603 			case 0: /* D2 vblank */
7604 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7605 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7606 
7607 				if (rdev->irq.crtc_vblank_int[1]) {
7608 					drm_handle_vblank(rdev->ddev, 1);
7609 					rdev->pm.vblank_sync = true;
7610 					wake_up(&rdev->irq.vblank_queue);
7611 				}
7612 				if (atomic_read(&rdev->irq.pflip[1]))
7613 					radeon_crtc_handle_vblank(rdev, 1);
7614 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7615 				DRM_DEBUG("IH: D2 vblank\n");
7616 
7617 				break;
7618 			case 1: /* D2 vline */
7619 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7620 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7621 
7622 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7623 				DRM_DEBUG("IH: D2 vline\n");
7624 
7625 				break;
7626 			default:
7627 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7628 				break;
7629 			}
7630 			break;
7631 		case 3: /* D3 vblank/vline */
7632 			switch (src_data) {
7633 			case 0: /* D3 vblank */
7634 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7635 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7636 
7637 				if (rdev->irq.crtc_vblank_int[2]) {
7638 					drm_handle_vblank(rdev->ddev, 2);
7639 					rdev->pm.vblank_sync = true;
7640 					wake_up(&rdev->irq.vblank_queue);
7641 				}
7642 				if (atomic_read(&rdev->irq.pflip[2]))
7643 					radeon_crtc_handle_vblank(rdev, 2);
7644 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7645 				DRM_DEBUG("IH: D3 vblank\n");
7646 
7647 				break;
7648 			case 1: /* D3 vline */
7649 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7650 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7651 
7652 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7653 				DRM_DEBUG("IH: D3 vline\n");
7654 
7655 				break;
7656 			default:
7657 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7658 				break;
7659 			}
7660 			break;
7661 		case 4: /* D4 vblank/vline */
7662 			switch (src_data) {
7663 			case 0: /* D4 vblank */
7664 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7665 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7666 
7667 				if (rdev->irq.crtc_vblank_int[3]) {
7668 					drm_handle_vblank(rdev->ddev, 3);
7669 					rdev->pm.vblank_sync = true;
7670 					wake_up(&rdev->irq.vblank_queue);
7671 				}
7672 				if (atomic_read(&rdev->irq.pflip[3]))
7673 					radeon_crtc_handle_vblank(rdev, 3);
7674 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7675 				DRM_DEBUG("IH: D4 vblank\n");
7676 
7677 				break;
7678 			case 1: /* D4 vline */
7679 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7680 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7681 
7682 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7683 				DRM_DEBUG("IH: D4 vline\n");
7684 
7685 				break;
7686 			default:
7687 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7688 				break;
7689 			}
7690 			break;
7691 		case 5: /* D5 vblank/vline */
7692 			switch (src_data) {
7693 			case 0: /* D5 vblank */
7694 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7695 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7696 
7697 				if (rdev->irq.crtc_vblank_int[4]) {
7698 					drm_handle_vblank(rdev->ddev, 4);
7699 					rdev->pm.vblank_sync = true;
7700 					wake_up(&rdev->irq.vblank_queue);
7701 				}
7702 				if (atomic_read(&rdev->irq.pflip[4]))
7703 					radeon_crtc_handle_vblank(rdev, 4);
7704 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7705 				DRM_DEBUG("IH: D5 vblank\n");
7706 
7707 				break;
7708 			case 1: /* D5 vline */
7709 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7710 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7711 
7712 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7713 				DRM_DEBUG("IH: D5 vline\n");
7714 
7715 				break;
7716 			default:
7717 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7718 				break;
7719 			}
7720 			break;
7721 		case 6: /* D6 vblank/vline */
7722 			switch (src_data) {
7723 			case 0: /* D6 vblank */
7724 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7725 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7726 
7727 				if (rdev->irq.crtc_vblank_int[5]) {
7728 					drm_handle_vblank(rdev->ddev, 5);
7729 					rdev->pm.vblank_sync = true;
7730 					wake_up(&rdev->irq.vblank_queue);
7731 				}
7732 				if (atomic_read(&rdev->irq.pflip[5]))
7733 					radeon_crtc_handle_vblank(rdev, 5);
7734 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7735 				DRM_DEBUG("IH: D6 vblank\n");
7736 
7737 				break;
7738 			case 1: /* D6 vline */
7739 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7740 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7741 
7742 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7743 				DRM_DEBUG("IH: D6 vline\n");
7744 
7745 				break;
7746 			default:
7747 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7748 				break;
7749 			}
7750 			break;
7751 		case 8: /* D1 page flip */
7752 		case 10: /* D2 page flip */
7753 		case 12: /* D3 page flip */
7754 		case 14: /* D4 page flip */
7755 		case 16: /* D5 page flip */
7756 		case 18: /* D6 page flip */
7757 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7758 			if (radeon_use_pflipirq > 0)
7759 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7760 			break;
7761 		case 42: /* HPD hotplug */
7762 			switch (src_data) {
7763 			case 0:
7764 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7765 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766 
7767 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7768 				queue_hotplug = true;
7769 				DRM_DEBUG("IH: HPD1\n");
7770 
7771 				break;
7772 			case 1:
7773 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7774 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7775 
7776 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7777 				queue_hotplug = true;
7778 				DRM_DEBUG("IH: HPD2\n");
7779 
7780 				break;
7781 			case 2:
7782 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7783 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784 
7785 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7786 				queue_hotplug = true;
7787 				DRM_DEBUG("IH: HPD3\n");
7788 
7789 				break;
7790 			case 3:
7791 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7792 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7793 
7794 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7795 				queue_hotplug = true;
7796 				DRM_DEBUG("IH: HPD4\n");
7797 
7798 				break;
7799 			case 4:
7800 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7801 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7802 
7803 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7804 				queue_hotplug = true;
7805 				DRM_DEBUG("IH: HPD5\n");
7806 
7807 				break;
7808 			case 5:
7809 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7810 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7811 
7812 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7813 				queue_hotplug = true;
7814 				DRM_DEBUG("IH: HPD6\n");
7815 
7816 				break;
7817 			case 6:
7818 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7819 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7820 
7821 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7822 				queue_dp = true;
7823 				DRM_DEBUG("IH: HPD_RX 1\n");
7824 
7825 				break;
7826 			case 7:
7827 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7828 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7829 
7830 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7831 				queue_dp = true;
7832 				DRM_DEBUG("IH: HPD_RX 2\n");
7833 
7834 				break;
7835 			case 8:
7836 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7837 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7838 
7839 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7840 				queue_dp = true;
7841 				DRM_DEBUG("IH: HPD_RX 3\n");
7842 
7843 				break;
7844 			case 9:
7845 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7846 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7847 
7848 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7849 				queue_dp = true;
7850 				DRM_DEBUG("IH: HPD_RX 4\n");
7851 
7852 				break;
7853 			case 10:
7854 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7855 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7856 
7857 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7858 				queue_dp = true;
7859 				DRM_DEBUG("IH: HPD_RX 5\n");
7860 
7861 				break;
7862 			case 11:
7863 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7864 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7865 
7866 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7867 				queue_dp = true;
7868 				DRM_DEBUG("IH: HPD_RX 6\n");
7869 
7870 				break;
7871 			default:
7872 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7873 				break;
7874 			}
7875 			break;
7876 		case 96:
7877 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7878 			WREG32(SRBM_INT_ACK, 0x1);
7879 			break;
7880 		case 124: /* UVD */
7881 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7882 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7883 			break;
7884 		case 146:
7885 		case 147:
7886 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7887 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7888 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7889 			/* reset addr and status */
7890 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7891 			if (addr == 0x0 && status == 0x0)
7892 				break;
7893 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7894 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7895 				addr);
7896 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7897 				status);
7898 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7899 			break;
7900 		case 167: /* VCE */
7901 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7902 			switch (src_data) {
7903 			case 0:
7904 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7905 				break;
7906 			case 1:
7907 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7908 				break;
7909 			default:
7910 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7911 				break;
7912 			}
7913 			break;
7914 		case 176: /* GFX RB CP_INT */
7915 		case 177: /* GFX IB CP_INT */
7916 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7917 			break;
7918 		case 181: /* CP EOP event */
7919 			DRM_DEBUG("IH: CP EOP\n");
7920 			/* XXX check the bitfield order! */
7921 			me_id = (ring_id & 0x60) >> 5;
7922 			pipe_id = (ring_id & 0x18) >> 3;
7923 			queue_id = (ring_id & 0x7) >> 0;
7924 			switch (me_id) {
7925 			case 0:
7926 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927 				break;
7928 			case 1:
7929 			case 2:
7930 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7931 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7932 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7933 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7934 				break;
7935 			}
7936 			break;
7937 		case 184: /* CP Privileged reg access */
7938 			DRM_ERROR("Illegal register access in command stream\n");
7939 			/* XXX check the bitfield order! */
7940 			me_id = (ring_id & 0x60) >> 5;
7941 			pipe_id = (ring_id & 0x18) >> 3;
7942 			queue_id = (ring_id & 0x7) >> 0;
7943 			switch (me_id) {
7944 			case 0:
7945 				/* This results in a full GPU reset, but all we need to do is soft
7946 				 * reset the CP for gfx
7947 				 */
7948 				queue_reset = true;
7949 				break;
7950 			case 1:
7951 				/* XXX compute */
7952 				queue_reset = true;
7953 				break;
7954 			case 2:
7955 				/* XXX compute */
7956 				queue_reset = true;
7957 				break;
7958 			}
7959 			break;
7960 		case 185: /* CP Privileged inst */
7961 			DRM_ERROR("Illegal instruction in command stream\n");
7962 			/* XXX check the bitfield order! */
7963 			me_id = (ring_id & 0x60) >> 5;
7964 			pipe_id = (ring_id & 0x18) >> 3;
7965 			queue_id = (ring_id & 0x7) >> 0;
7966 			switch (me_id) {
7967 			case 0:
7968 				/* This results in a full GPU reset, but all we need to do is soft
7969 				 * reset the CP for gfx
7970 				 */
7971 				queue_reset = true;
7972 				break;
7973 			case 1:
7974 				/* XXX compute */
7975 				queue_reset = true;
7976 				break;
7977 			case 2:
7978 				/* XXX compute */
7979 				queue_reset = true;
7980 				break;
7981 			}
7982 			break;
7983 		case 224: /* SDMA trap event */
7984 			/* XXX check the bitfield order! */
7985 			me_id = (ring_id & 0x3) >> 0;
7986 			queue_id = (ring_id & 0xc) >> 2;
7987 			DRM_DEBUG("IH: SDMA trap\n");
7988 			switch (me_id) {
7989 			case 0:
7990 				switch (queue_id) {
7991 				case 0:
7992 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7993 					break;
7994 				case 1:
7995 					/* XXX compute */
7996 					break;
7997 				case 2:
7998 					/* XXX compute */
7999 					break;
8000 				}
8001 				break;
8002 			case 1:
8003 				switch (queue_id) {
8004 				case 0:
8005 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8006 					break;
8007 				case 1:
8008 					/* XXX compute */
8009 					break;
8010 				case 2:
8011 					/* XXX compute */
8012 					break;
8013 				}
8014 				break;
8015 			}
8016 			break;
8017 		case 230: /* thermal low to high */
8018 			DRM_DEBUG("IH: thermal low to high\n");
8019 			rdev->pm.dpm.thermal.high_to_low = false;
8020 			queue_thermal = true;
8021 			break;
8022 		case 231: /* thermal high to low */
8023 			DRM_DEBUG("IH: thermal high to low\n");
8024 			rdev->pm.dpm.thermal.high_to_low = true;
8025 			queue_thermal = true;
8026 			break;
8027 		case 233: /* GUI IDLE */
8028 			DRM_DEBUG("IH: GUI idle\n");
8029 			break;
8030 		case 241: /* SDMA Privileged inst */
8031 		case 247: /* SDMA Privileged inst */
8032 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8033 			/* XXX check the bitfield order! */
8034 			me_id = (ring_id & 0x3) >> 0;
8035 			queue_id = (ring_id & 0xc) >> 2;
8036 			switch (me_id) {
8037 			case 0:
8038 				switch (queue_id) {
8039 				case 0:
8040 					queue_reset = true;
8041 					break;
8042 				case 1:
8043 					/* XXX compute */
8044 					queue_reset = true;
8045 					break;
8046 				case 2:
8047 					/* XXX compute */
8048 					queue_reset = true;
8049 					break;
8050 				}
8051 				break;
8052 			case 1:
8053 				switch (queue_id) {
8054 				case 0:
8055 					queue_reset = true;
8056 					break;
8057 				case 1:
8058 					/* XXX compute */
8059 					queue_reset = true;
8060 					break;
8061 				case 2:
8062 					/* XXX compute */
8063 					queue_reset = true;
8064 					break;
8065 				}
8066 				break;
8067 			}
8068 			break;
8069 		default:
8070 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8071 			break;
8072 		}
8073 
8074 		/* wptr/rptr are in bytes! */
8075 		rptr += 16;
8076 		rptr &= rdev->ih.ptr_mask;
8077 		WREG32(IH_RB_RPTR, rptr);
8078 	}
8079 	if (queue_dp)
8080 		schedule_work(&rdev->dp_work);
8081 	if (queue_hotplug)
8082 		schedule_delayed_work(&rdev->hotplug_work, 0);
8083 	if (queue_reset) {
8084 		rdev->needs_reset = true;
8085 		wake_up_all(&rdev->fence_queue);
8086 	}
8087 	if (queue_thermal)
8088 		schedule_work(&rdev->pm.dpm.thermal.work);
8089 	rdev->ih.rptr = rptr;
8090 	atomic_set(&rdev->ih.lock, 0);
8091 
8092 	/* make sure wptr hasn't changed while processing */
8093 	wptr = cik_get_ih_wptr(rdev);
8094 	if (wptr != rptr)
8095 		goto restart_ih;
8096 
8097 	return IRQ_HANDLED;
8098 }
8099 
8100 /*
8101  * startup/shutdown callbacks
8102  */
8103 static void cik_uvd_init(struct radeon_device *rdev)
8104 {
8105 	int r;
8106 
8107 	if (!rdev->has_uvd)
8108 		return;
8109 
8110 	r = radeon_uvd_init(rdev);
8111 	if (r) {
8112 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8113 		/*
8114 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8115 		 * to early fails cik_uvd_start() and thus nothing happens
8116 		 * there. So it is pointless to try to go through that code
8117 		 * hence why we disable uvd here.
8118 		 */
8119 		rdev->has_uvd = 0;
8120 		return;
8121 	}
8122 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8123 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8124 }
8125 
8126 static void cik_uvd_start(struct radeon_device *rdev)
8127 {
8128 	int r;
8129 
8130 	if (!rdev->has_uvd)
8131 		return;
8132 
8133 	r = radeon_uvd_resume(rdev);
8134 	if (r) {
8135 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8136 		goto error;
8137 	}
8138 	r = uvd_v4_2_resume(rdev);
8139 	if (r) {
8140 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8141 		goto error;
8142 	}
8143 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8144 	if (r) {
8145 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8146 		goto error;
8147 	}
8148 	return;
8149 
8150 error:
8151 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8152 }
8153 
8154 static void cik_uvd_resume(struct radeon_device *rdev)
8155 {
8156 	struct radeon_ring *ring;
8157 	int r;
8158 
8159 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8160 		return;
8161 
8162 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8163 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8164 	if (r) {
8165 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8166 		return;
8167 	}
8168 	r = uvd_v1_0_init(rdev);
8169 	if (r) {
8170 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8171 		return;
8172 	}
8173 }
8174 
8175 static void cik_vce_init(struct radeon_device *rdev)
8176 {
8177 	int r;
8178 
8179 	if (!rdev->has_vce)
8180 		return;
8181 
8182 	r = radeon_vce_init(rdev);
8183 	if (r) {
8184 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8185 		/*
8186 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8187 		 * to early fails cik_vce_start() and thus nothing happens
8188 		 * there. So it is pointless to try to go through that code
8189 		 * hence why we disable vce here.
8190 		 */
8191 		rdev->has_vce = 0;
8192 		return;
8193 	}
8194 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8195 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8196 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8197 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8198 }
8199 
8200 static void cik_vce_start(struct radeon_device *rdev)
8201 {
8202 	int r;
8203 
8204 	if (!rdev->has_vce)
8205 		return;
8206 
8207 	r = radeon_vce_resume(rdev);
8208 	if (r) {
8209 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8210 		goto error;
8211 	}
8212 	r = vce_v2_0_resume(rdev);
8213 	if (r) {
8214 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8215 		goto error;
8216 	}
8217 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8218 	if (r) {
8219 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8220 		goto error;
8221 	}
8222 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8223 	if (r) {
8224 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8225 		goto error;
8226 	}
8227 	return;
8228 
8229 error:
8230 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8231 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8232 }
8233 
8234 static void cik_vce_resume(struct radeon_device *rdev)
8235 {
8236 	struct radeon_ring *ring;
8237 	int r;
8238 
8239 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8240 		return;
8241 
8242 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8243 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8244 	if (r) {
8245 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8246 		return;
8247 	}
8248 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8249 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250 	if (r) {
8251 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252 		return;
8253 	}
8254 	r = vce_v1_0_init(rdev);
8255 	if (r) {
8256 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8257 		return;
8258 	}
8259 }
8260 
8261 /**
8262  * cik_startup - program the asic to a functional state
8263  *
8264  * @rdev: radeon_device pointer
8265  *
8266  * Programs the asic to a functional state (CIK).
8267  * Called by cik_init() and cik_resume().
8268  * Returns 0 for success, error for failure.
8269  */
8270 static int cik_startup(struct radeon_device *rdev)
8271 {
8272 	struct radeon_ring *ring;
8273 	u32 nop;
8274 	int r;
8275 
8276 	/* enable pcie gen2/3 link */
8277 	cik_pcie_gen3_enable(rdev);
8278 	/* enable aspm */
8279 	cik_program_aspm(rdev);
8280 
8281 	/* scratch needs to be initialized before MC */
8282 	r = r600_vram_scratch_init(rdev);
8283 	if (r)
8284 		return r;
8285 
8286 	cik_mc_program(rdev);
8287 
8288 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8289 		r = ci_mc_load_microcode(rdev);
8290 		if (r) {
8291 			DRM_ERROR("Failed to load MC firmware!\n");
8292 			return r;
8293 		}
8294 	}
8295 
8296 	r = cik_pcie_gart_enable(rdev);
8297 	if (r)
8298 		return r;
8299 	cik_gpu_init(rdev);
8300 
8301 	/* allocate rlc buffers */
8302 	if (rdev->flags & RADEON_IS_IGP) {
8303 		if (rdev->family == CHIP_KAVERI) {
8304 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8305 			rdev->rlc.reg_list_size =
8306 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8307 		} else {
8308 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8309 			rdev->rlc.reg_list_size =
8310 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8311 		}
8312 	}
8313 	rdev->rlc.cs_data = ci_cs_data;
8314 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8315 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8316 	r = sumo_rlc_init(rdev);
8317 	if (r) {
8318 		DRM_ERROR("Failed to init rlc BOs!\n");
8319 		return r;
8320 	}
8321 
8322 	/* allocate wb buffer */
8323 	r = radeon_wb_init(rdev);
8324 	if (r)
8325 		return r;
8326 
8327 	/* allocate mec buffers */
8328 	r = cik_mec_init(rdev);
8329 	if (r) {
8330 		DRM_ERROR("Failed to init MEC BOs!\n");
8331 		return r;
8332 	}
8333 
8334 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8335 	if (r) {
8336 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8337 		return r;
8338 	}
8339 
8340 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8341 	if (r) {
8342 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343 		return r;
8344 	}
8345 
8346 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8347 	if (r) {
8348 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349 		return r;
8350 	}
8351 
8352 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8353 	if (r) {
8354 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8355 		return r;
8356 	}
8357 
8358 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8359 	if (r) {
8360 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361 		return r;
8362 	}
8363 
8364 	cik_uvd_start(rdev);
8365 	cik_vce_start(rdev);
8366 
8367 	/* Enable IRQ */
8368 	if (!rdev->irq.installed) {
8369 		r = radeon_irq_kms_init(rdev);
8370 		if (r)
8371 			return r;
8372 	}
8373 
8374 	r = cik_irq_init(rdev);
8375 	if (r) {
8376 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8377 		radeon_irq_kms_fini(rdev);
8378 		return r;
8379 	}
8380 	cik_irq_set(rdev);
8381 
8382 	if (rdev->family == CHIP_HAWAII) {
8383 		if (rdev->new_fw)
8384 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8385 		else
8386 			nop = RADEON_CP_PACKET2;
8387 	} else {
8388 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8389 	}
8390 
8391 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8392 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8393 			     nop);
8394 	if (r)
8395 		return r;
8396 
8397 	/* set up the compute queues */
8398 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8399 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8400 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8401 			     nop);
8402 	if (r)
8403 		return r;
8404 	ring->me = 1; /* first MEC */
8405 	ring->pipe = 0; /* first pipe */
8406 	ring->queue = 0; /* first queue */
8407 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8408 
8409 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8410 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8411 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8412 			     nop);
8413 	if (r)
8414 		return r;
8415 	/* dGPU only have 1 MEC */
8416 	ring->me = 1; /* first MEC */
8417 	ring->pipe = 0; /* first pipe */
8418 	ring->queue = 1; /* second queue */
8419 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8420 
8421 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8422 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8423 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8424 	if (r)
8425 		return r;
8426 
8427 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8428 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8429 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430 	if (r)
8431 		return r;
8432 
8433 	r = cik_cp_resume(rdev);
8434 	if (r)
8435 		return r;
8436 
8437 	r = cik_sdma_resume(rdev);
8438 	if (r)
8439 		return r;
8440 
8441 	cik_uvd_resume(rdev);
8442 	cik_vce_resume(rdev);
8443 
8444 	r = radeon_ib_pool_init(rdev);
8445 	if (r) {
8446 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8447 		return r;
8448 	}
8449 
8450 	r = radeon_vm_manager_init(rdev);
8451 	if (r) {
8452 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8453 		return r;
8454 	}
8455 
8456 	r = radeon_audio_init(rdev);
8457 	if (r)
8458 		return r;
8459 
8460 	return 0;
8461 }
8462 
8463 /**
8464  * cik_resume - resume the asic to a functional state
8465  *
8466  * @rdev: radeon_device pointer
8467  *
8468  * Programs the asic to a functional state (CIK).
8469  * Called at resume.
8470  * Returns 0 for success, error for failure.
8471  */
8472 int cik_resume(struct radeon_device *rdev)
8473 {
8474 	int r;
8475 
8476 	/* post card */
8477 	atom_asic_init(rdev->mode_info.atom_context);
8478 
8479 	/* init golden registers */
8480 	cik_init_golden_registers(rdev);
8481 
8482 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8483 		radeon_pm_resume(rdev);
8484 
8485 	rdev->accel_working = true;
8486 	r = cik_startup(rdev);
8487 	if (r) {
8488 		DRM_ERROR("cik startup failed on resume\n");
8489 		rdev->accel_working = false;
8490 		return r;
8491 	}
8492 
8493 	return r;
8494 
8495 }
8496 
8497 /**
8498  * cik_suspend - suspend the asic
8499  *
8500  * @rdev: radeon_device pointer
8501  *
8502  * Bring the chip into a state suitable for suspend (CIK).
8503  * Called at suspend.
8504  * Returns 0 for success.
8505  */
8506 int cik_suspend(struct radeon_device *rdev)
8507 {
8508 	radeon_pm_suspend(rdev);
8509 	radeon_audio_fini(rdev);
8510 	radeon_vm_manager_fini(rdev);
8511 	cik_cp_enable(rdev, false);
8512 	cik_sdma_enable(rdev, false);
8513 	if (rdev->has_uvd) {
8514 		uvd_v1_0_fini(rdev);
8515 		radeon_uvd_suspend(rdev);
8516 	}
8517 	if (rdev->has_vce)
8518 		radeon_vce_suspend(rdev);
8519 	cik_fini_pg(rdev);
8520 	cik_fini_cg(rdev);
8521 	cik_irq_suspend(rdev);
8522 	radeon_wb_disable(rdev);
8523 	cik_pcie_gart_disable(rdev);
8524 	return 0;
8525 }
8526 
8527 /* Plan is to move initialization in that function and use
8528  * helper function so that radeon_device_init pretty much
8529  * do nothing more than calling asic specific function. This
8530  * should also allow to remove a bunch of callback function
8531  * like vram_info.
8532  */
8533 /**
8534  * cik_init - asic specific driver and hw init
8535  *
8536  * @rdev: radeon_device pointer
8537  *
8538  * Setup asic specific driver variables and program the hw
8539  * to a functional state (CIK).
8540  * Called at driver startup.
8541  * Returns 0 for success, errors for failure.
8542  */
8543 int cik_init(struct radeon_device *rdev)
8544 {
8545 	struct radeon_ring *ring;
8546 	int r;
8547 
8548 	/* Read BIOS */
8549 	if (!radeon_get_bios(rdev)) {
8550 		if (ASIC_IS_AVIVO(rdev))
8551 			return -EINVAL;
8552 	}
8553 	/* Must be an ATOMBIOS */
8554 	if (!rdev->is_atom_bios) {
8555 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8556 		return -EINVAL;
8557 	}
8558 	r = radeon_atombios_init(rdev);
8559 	if (r)
8560 		return r;
8561 
8562 	/* Post card if necessary */
8563 	if (!radeon_card_posted(rdev)) {
8564 		if (!rdev->bios) {
8565 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8566 			return -EINVAL;
8567 		}
8568 		DRM_INFO("GPU not posted. posting now...\n");
8569 		atom_asic_init(rdev->mode_info.atom_context);
8570 	}
8571 	/* init golden registers */
8572 	cik_init_golden_registers(rdev);
8573 	/* Initialize scratch registers */
8574 	cik_scratch_init(rdev);
8575 	/* Initialize surface registers */
8576 	radeon_surface_init(rdev);
8577 	/* Initialize clocks */
8578 	radeon_get_clock_info(rdev->ddev);
8579 
8580 	/* Fence driver */
8581 	r = radeon_fence_driver_init(rdev);
8582 	if (r)
8583 		return r;
8584 
8585 	/* initialize memory controller */
8586 	r = cik_mc_init(rdev);
8587 	if (r)
8588 		return r;
8589 	/* Memory manager */
8590 	r = radeon_bo_init(rdev);
8591 	if (r)
8592 		return r;
8593 
8594 	if (rdev->flags & RADEON_IS_IGP) {
8595 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8596 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8597 			r = cik_init_microcode(rdev);
8598 			if (r) {
8599 				DRM_ERROR("Failed to load firmware!\n");
8600 				return r;
8601 			}
8602 		}
8603 	} else {
8604 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8605 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8606 		    !rdev->mc_fw) {
8607 			r = cik_init_microcode(rdev);
8608 			if (r) {
8609 				DRM_ERROR("Failed to load firmware!\n");
8610 				return r;
8611 			}
8612 		}
8613 	}
8614 
8615 	/* Initialize power management */
8616 	radeon_pm_init(rdev);
8617 
8618 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8619 	ring->ring_obj = NULL;
8620 	r600_ring_init(rdev, ring, 1024 * 1024);
8621 
8622 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8623 	ring->ring_obj = NULL;
8624 	r600_ring_init(rdev, ring, 1024 * 1024);
8625 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8626 	if (r)
8627 		return r;
8628 
8629 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8630 	ring->ring_obj = NULL;
8631 	r600_ring_init(rdev, ring, 1024 * 1024);
8632 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8633 	if (r)
8634 		return r;
8635 
8636 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8637 	ring->ring_obj = NULL;
8638 	r600_ring_init(rdev, ring, 256 * 1024);
8639 
8640 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8641 	ring->ring_obj = NULL;
8642 	r600_ring_init(rdev, ring, 256 * 1024);
8643 
8644 	cik_uvd_init(rdev);
8645 	cik_vce_init(rdev);
8646 
8647 	rdev->ih.ring_obj = NULL;
8648 	r600_ih_ring_init(rdev, 64 * 1024);
8649 
8650 	r = r600_pcie_gart_init(rdev);
8651 	if (r)
8652 		return r;
8653 
8654 	rdev->accel_working = true;
8655 	r = cik_startup(rdev);
8656 	if (r) {
8657 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8658 		cik_cp_fini(rdev);
8659 		cik_sdma_fini(rdev);
8660 		cik_irq_fini(rdev);
8661 		sumo_rlc_fini(rdev);
8662 		cik_mec_fini(rdev);
8663 		radeon_wb_fini(rdev);
8664 		radeon_ib_pool_fini(rdev);
8665 		radeon_vm_manager_fini(rdev);
8666 		radeon_irq_kms_fini(rdev);
8667 		cik_pcie_gart_fini(rdev);
8668 		rdev->accel_working = false;
8669 	}
8670 
8671 	/* Don't start up if the MC ucode is missing.
8672 	 * The default clocks and voltages before the MC ucode
8673 	 * is loaded are not suffient for advanced operations.
8674 	 */
8675 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8676 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8677 		return -EINVAL;
8678 	}
8679 
8680 	return 0;
8681 }
8682 
8683 /**
8684  * cik_fini - asic specific driver and hw fini
8685  *
8686  * @rdev: radeon_device pointer
8687  *
8688  * Tear down the asic specific driver variables and program the hw
8689  * to an idle state (CIK).
8690  * Called at driver unload.
8691  */
8692 void cik_fini(struct radeon_device *rdev)
8693 {
8694 	radeon_pm_fini(rdev);
8695 	cik_cp_fini(rdev);
8696 	cik_sdma_fini(rdev);
8697 	cik_fini_pg(rdev);
8698 	cik_fini_cg(rdev);
8699 	cik_irq_fini(rdev);
8700 	sumo_rlc_fini(rdev);
8701 	cik_mec_fini(rdev);
8702 	radeon_wb_fini(rdev);
8703 	radeon_vm_manager_fini(rdev);
8704 	radeon_ib_pool_fini(rdev);
8705 	radeon_irq_kms_fini(rdev);
8706 	uvd_v1_0_fini(rdev);
8707 	radeon_uvd_fini(rdev);
8708 	radeon_vce_fini(rdev);
8709 	cik_pcie_gart_fini(rdev);
8710 	r600_vram_scratch_fini(rdev);
8711 	radeon_gem_fini(rdev);
8712 	radeon_fence_driver_fini(rdev);
8713 	radeon_bo_fini(rdev);
8714 	radeon_atombios_fini(rdev);
8715 	kfree(rdev->bios);
8716 	rdev->bios = NULL;
8717 }
8718 
8719 void dce8_program_fmt(struct drm_encoder *encoder)
8720 {
8721 	struct drm_device *dev = encoder->dev;
8722 	struct radeon_device *rdev = dev->dev_private;
8723 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8724 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8725 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8726 	int bpc = 0;
8727 	u32 tmp = 0;
8728 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8729 
8730 	if (connector) {
8731 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8732 		bpc = radeon_get_monitor_bpc(connector);
8733 		dither = radeon_connector->dither;
8734 	}
8735 
8736 	/* LVDS/eDP FMT is set up by atom */
8737 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8738 		return;
8739 
8740 	/* not needed for analog */
8741 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8742 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8743 		return;
8744 
8745 	if (bpc == 0)
8746 		return;
8747 
8748 	switch (bpc) {
8749 	case 6:
8750 		if (dither == RADEON_FMT_DITHER_ENABLE)
8751 			/* XXX sort out optimal dither settings */
8752 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8753 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8754 		else
8755 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8756 		break;
8757 	case 8:
8758 		if (dither == RADEON_FMT_DITHER_ENABLE)
8759 			/* XXX sort out optimal dither settings */
8760 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8761 				FMT_RGB_RANDOM_ENABLE |
8762 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8763 		else
8764 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8765 		break;
8766 	case 10:
8767 		if (dither == RADEON_FMT_DITHER_ENABLE)
8768 			/* XXX sort out optimal dither settings */
8769 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8770 				FMT_RGB_RANDOM_ENABLE |
8771 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8772 		else
8773 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8774 		break;
8775 	default:
8776 		/* not needed */
8777 		break;
8778 	}
8779 
8780 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8781 }
8782 
8783 /* display watermark setup */
8784 /**
8785  * dce8_line_buffer_adjust - Set up the line buffer
8786  *
8787  * @rdev: radeon_device pointer
8788  * @radeon_crtc: the selected display controller
8789  * @mode: the current display mode on the selected display
8790  * controller
8791  *
8792  * Setup up the line buffer allocation for
8793  * the selected display controller (CIK).
8794  * Returns the line buffer size in pixels.
8795  */
8796 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8797 				   struct radeon_crtc *radeon_crtc,
8798 				   struct drm_display_mode *mode)
8799 {
8800 	u32 tmp, buffer_alloc, i;
8801 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8802 	/*
8803 	 * Line Buffer Setup
8804 	 * There are 6 line buffers, one for each display controllers.
8805 	 * There are 3 partitions per LB. Select the number of partitions
8806 	 * to enable based on the display width.  For display widths larger
8807 	 * than 4096, you need use to use 2 display controllers and combine
8808 	 * them using the stereo blender.
8809 	 */
8810 	if (radeon_crtc->base.enabled && mode) {
8811 		if (mode->crtc_hdisplay < 1920) {
8812 			tmp = 1;
8813 			buffer_alloc = 2;
8814 		} else if (mode->crtc_hdisplay < 2560) {
8815 			tmp = 2;
8816 			buffer_alloc = 2;
8817 		} else if (mode->crtc_hdisplay < 4096) {
8818 			tmp = 0;
8819 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8820 		} else {
8821 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8822 			tmp = 0;
8823 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824 		}
8825 	} else {
8826 		tmp = 1;
8827 		buffer_alloc = 0;
8828 	}
8829 
8830 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8831 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8832 
8833 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8834 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8835 	for (i = 0; i < rdev->usec_timeout; i++) {
8836 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8837 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8838 			break;
8839 		udelay(1);
8840 	}
8841 
8842 	if (radeon_crtc->base.enabled && mode) {
8843 		switch (tmp) {
8844 		case 0:
8845 		default:
8846 			return 4096 * 2;
8847 		case 1:
8848 			return 1920 * 2;
8849 		case 2:
8850 			return 2560 * 2;
8851 		}
8852 	}
8853 
8854 	/* controller not enabled, so no lb used */
8855 	return 0;
8856 }
8857 
8858 /**
8859  * cik_get_number_of_dram_channels - get the number of dram channels
8860  *
8861  * @rdev: radeon_device pointer
8862  *
8863  * Look up the number of video ram channels (CIK).
8864  * Used for display watermark bandwidth calculations
8865  * Returns the number of dram channels
8866  */
8867 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8868 {
8869 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8870 
8871 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8872 	case 0:
8873 	default:
8874 		return 1;
8875 	case 1:
8876 		return 2;
8877 	case 2:
8878 		return 4;
8879 	case 3:
8880 		return 8;
8881 	case 4:
8882 		return 3;
8883 	case 5:
8884 		return 6;
8885 	case 6:
8886 		return 10;
8887 	case 7:
8888 		return 12;
8889 	case 8:
8890 		return 16;
8891 	}
8892 }
8893 
8894 struct dce8_wm_params {
8895 	u32 dram_channels; /* number of dram channels */
8896 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8897 	u32 sclk;          /* engine clock in kHz */
8898 	u32 disp_clk;      /* display clock in kHz */
8899 	u32 src_width;     /* viewport width */
8900 	u32 active_time;   /* active display time in ns */
8901 	u32 blank_time;    /* blank time in ns */
8902 	bool interlaced;    /* mode is interlaced */
8903 	fixed20_12 vsc;    /* vertical scale ratio */
8904 	u32 num_heads;     /* number of active crtcs */
8905 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8906 	u32 lb_size;       /* line buffer allocated to pipe */
8907 	u32 vtaps;         /* vertical scaler taps */
8908 };
8909 
8910 /**
8911  * dce8_dram_bandwidth - get the dram bandwidth
8912  *
8913  * @wm: watermark calculation data
8914  *
8915  * Calculate the raw dram bandwidth (CIK).
8916  * Used for display watermark bandwidth calculations
8917  * Returns the dram bandwidth in MBytes/s
8918  */
8919 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8920 {
8921 	/* Calculate raw DRAM Bandwidth */
8922 	fixed20_12 dram_efficiency; /* 0.7 */
8923 	fixed20_12 yclk, dram_channels, bandwidth;
8924 	fixed20_12 a;
8925 
8926 	a.full = dfixed_const(1000);
8927 	yclk.full = dfixed_const(wm->yclk);
8928 	yclk.full = dfixed_div(yclk, a);
8929 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8930 	a.full = dfixed_const(10);
8931 	dram_efficiency.full = dfixed_const(7);
8932 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8933 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8934 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8935 
8936 	return dfixed_trunc(bandwidth);
8937 }
8938 
8939 /**
8940  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8941  *
8942  * @wm: watermark calculation data
8943  *
8944  * Calculate the dram bandwidth used for display (CIK).
8945  * Used for display watermark bandwidth calculations
8946  * Returns the dram bandwidth for display in MBytes/s
8947  */
8948 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8949 {
8950 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8951 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8952 	fixed20_12 yclk, dram_channels, bandwidth;
8953 	fixed20_12 a;
8954 
8955 	a.full = dfixed_const(1000);
8956 	yclk.full = dfixed_const(wm->yclk);
8957 	yclk.full = dfixed_div(yclk, a);
8958 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8959 	a.full = dfixed_const(10);
8960 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8961 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8962 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8963 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8964 
8965 	return dfixed_trunc(bandwidth);
8966 }
8967 
8968 /**
8969  * dce8_data_return_bandwidth - get the data return bandwidth
8970  *
8971  * @wm: watermark calculation data
8972  *
8973  * Calculate the data return bandwidth used for display (CIK).
8974  * Used for display watermark bandwidth calculations
8975  * Returns the data return bandwidth in MBytes/s
8976  */
8977 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8978 {
8979 	/* Calculate the display Data return Bandwidth */
8980 	fixed20_12 return_efficiency; /* 0.8 */
8981 	fixed20_12 sclk, bandwidth;
8982 	fixed20_12 a;
8983 
8984 	a.full = dfixed_const(1000);
8985 	sclk.full = dfixed_const(wm->sclk);
8986 	sclk.full = dfixed_div(sclk, a);
8987 	a.full = dfixed_const(10);
8988 	return_efficiency.full = dfixed_const(8);
8989 	return_efficiency.full = dfixed_div(return_efficiency, a);
8990 	a.full = dfixed_const(32);
8991 	bandwidth.full = dfixed_mul(a, sclk);
8992 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8993 
8994 	return dfixed_trunc(bandwidth);
8995 }
8996 
8997 /**
8998  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8999  *
9000  * @wm: watermark calculation data
9001  *
9002  * Calculate the dmif bandwidth used for display (CIK).
9003  * Used for display watermark bandwidth calculations
9004  * Returns the dmif bandwidth in MBytes/s
9005  */
9006 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9007 {
9008 	/* Calculate the DMIF Request Bandwidth */
9009 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9010 	fixed20_12 disp_clk, bandwidth;
9011 	fixed20_12 a, b;
9012 
9013 	a.full = dfixed_const(1000);
9014 	disp_clk.full = dfixed_const(wm->disp_clk);
9015 	disp_clk.full = dfixed_div(disp_clk, a);
9016 	a.full = dfixed_const(32);
9017 	b.full = dfixed_mul(a, disp_clk);
9018 
9019 	a.full = dfixed_const(10);
9020 	disp_clk_request_efficiency.full = dfixed_const(8);
9021 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9022 
9023 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9024 
9025 	return dfixed_trunc(bandwidth);
9026 }
9027 
9028 /**
9029  * dce8_available_bandwidth - get the min available bandwidth
9030  *
9031  * @wm: watermark calculation data
9032  *
9033  * Calculate the min available bandwidth used for display (CIK).
9034  * Used for display watermark bandwidth calculations
9035  * Returns the min available bandwidth in MBytes/s
9036  */
9037 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9038 {
9039 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9040 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9041 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9042 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9043 
9044 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9045 }
9046 
9047 /**
9048  * dce8_average_bandwidth - get the average available bandwidth
9049  *
9050  * @wm: watermark calculation data
9051  *
9052  * Calculate the average available bandwidth used for display (CIK).
9053  * Used for display watermark bandwidth calculations
9054  * Returns the average available bandwidth in MBytes/s
9055  */
9056 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9057 {
9058 	/* Calculate the display mode Average Bandwidth
9059 	 * DisplayMode should contain the source and destination dimensions,
9060 	 * timing, etc.
9061 	 */
9062 	fixed20_12 bpp;
9063 	fixed20_12 line_time;
9064 	fixed20_12 src_width;
9065 	fixed20_12 bandwidth;
9066 	fixed20_12 a;
9067 
9068 	a.full = dfixed_const(1000);
9069 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9070 	line_time.full = dfixed_div(line_time, a);
9071 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9072 	src_width.full = dfixed_const(wm->src_width);
9073 	bandwidth.full = dfixed_mul(src_width, bpp);
9074 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9075 	bandwidth.full = dfixed_div(bandwidth, line_time);
9076 
9077 	return dfixed_trunc(bandwidth);
9078 }
9079 
9080 /**
9081  * dce8_latency_watermark - get the latency watermark
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the latency watermark (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the latency watermark in ns
9088  */
9089 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9090 {
9091 	/* First calculate the latency in ns */
9092 	u32 mc_latency = 2000; /* 2000 ns. */
9093 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9094 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9095 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9096 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9097 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9098 		(wm->num_heads * cursor_line_pair_return_time);
9099 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9100 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9101 	u32 tmp, dmif_size = 12288;
9102 	fixed20_12 a, b, c;
9103 
9104 	if (wm->num_heads == 0)
9105 		return 0;
9106 
9107 	a.full = dfixed_const(2);
9108 	b.full = dfixed_const(1);
9109 	if ((wm->vsc.full > a.full) ||
9110 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9111 	    (wm->vtaps >= 5) ||
9112 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9113 		max_src_lines_per_dst_line = 4;
9114 	else
9115 		max_src_lines_per_dst_line = 2;
9116 
9117 	a.full = dfixed_const(available_bandwidth);
9118 	b.full = dfixed_const(wm->num_heads);
9119 	a.full = dfixed_div(a, b);
9120 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9121 	tmp = min(dfixed_trunc(a), tmp);
9122 
9123 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9124 
9125 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9126 	b.full = dfixed_const(1000);
9127 	c.full = dfixed_const(lb_fill_bw);
9128 	b.full = dfixed_div(c, b);
9129 	a.full = dfixed_div(a, b);
9130 	line_fill_time = dfixed_trunc(a);
9131 
9132 	if (line_fill_time < wm->active_time)
9133 		return latency;
9134 	else
9135 		return latency + (line_fill_time - wm->active_time);
9136 
9137 }
9138 
9139 /**
9140  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9141  * average and available dram bandwidth
9142  *
9143  * @wm: watermark calculation data
9144  *
9145  * Check if the display average bandwidth fits in the display
9146  * dram bandwidth (CIK).
9147  * Used for display watermark bandwidth calculations
9148  * Returns true if the display fits, false if not.
9149  */
9150 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9151 {
9152 	if (dce8_average_bandwidth(wm) <=
9153 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9154 		return true;
9155 	else
9156 		return false;
9157 }
9158 
9159 /**
9160  * dce8_average_bandwidth_vs_available_bandwidth - check
9161  * average and available bandwidth
9162  *
9163  * @wm: watermark calculation data
9164  *
9165  * Check if the display average bandwidth fits in the display
9166  * available bandwidth (CIK).
9167  * Used for display watermark bandwidth calculations
9168  * Returns true if the display fits, false if not.
9169  */
9170 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9171 {
9172 	if (dce8_average_bandwidth(wm) <=
9173 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9174 		return true;
9175 	else
9176 		return false;
9177 }
9178 
9179 /**
9180  * dce8_check_latency_hiding - check latency hiding
9181  *
9182  * @wm: watermark calculation data
9183  *
9184  * Check latency hiding (CIK).
9185  * Used for display watermark bandwidth calculations
9186  * Returns true if the display fits, false if not.
9187  */
9188 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9189 {
9190 	u32 lb_partitions = wm->lb_size / wm->src_width;
9191 	u32 line_time = wm->active_time + wm->blank_time;
9192 	u32 latency_tolerant_lines;
9193 	u32 latency_hiding;
9194 	fixed20_12 a;
9195 
9196 	a.full = dfixed_const(1);
9197 	if (wm->vsc.full > a.full)
9198 		latency_tolerant_lines = 1;
9199 	else {
9200 		if (lb_partitions <= (wm->vtaps + 1))
9201 			latency_tolerant_lines = 1;
9202 		else
9203 			latency_tolerant_lines = 2;
9204 	}
9205 
9206 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9207 
9208 	if (dce8_latency_watermark(wm) <= latency_hiding)
9209 		return true;
9210 	else
9211 		return false;
9212 }
9213 
9214 /**
9215  * dce8_program_watermarks - program display watermarks
9216  *
9217  * @rdev: radeon_device pointer
9218  * @radeon_crtc: the selected display controller
9219  * @lb_size: line buffer size
9220  * @num_heads: number of display controllers in use
9221  *
9222  * Calculate and program the display watermarks for the
9223  * selected display controller (CIK).
9224  */
9225 static void dce8_program_watermarks(struct radeon_device *rdev,
9226 				    struct radeon_crtc *radeon_crtc,
9227 				    u32 lb_size, u32 num_heads)
9228 {
9229 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9230 	struct dce8_wm_params wm_low, wm_high;
9231 	u32 active_time;
9232 	u32 line_time = 0;
9233 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9234 	u32 tmp, wm_mask;
9235 
9236 	if (radeon_crtc->base.enabled && num_heads && mode) {
9237 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9238 					    (u32)mode->clock);
9239 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9240 					  (u32)mode->clock);
9241 		line_time = min(line_time, (u32)65535);
9242 
9243 		/* watermark for high clocks */
9244 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9245 		    rdev->pm.dpm_enabled) {
9246 			wm_high.yclk =
9247 				radeon_dpm_get_mclk(rdev, false) * 10;
9248 			wm_high.sclk =
9249 				radeon_dpm_get_sclk(rdev, false) * 10;
9250 		} else {
9251 			wm_high.yclk = rdev->pm.current_mclk * 10;
9252 			wm_high.sclk = rdev->pm.current_sclk * 10;
9253 		}
9254 
9255 		wm_high.disp_clk = mode->clock;
9256 		wm_high.src_width = mode->crtc_hdisplay;
9257 		wm_high.active_time = active_time;
9258 		wm_high.blank_time = line_time - wm_high.active_time;
9259 		wm_high.interlaced = false;
9260 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9261 			wm_high.interlaced = true;
9262 		wm_high.vsc = radeon_crtc->vsc;
9263 		wm_high.vtaps = 1;
9264 		if (radeon_crtc->rmx_type != RMX_OFF)
9265 			wm_high.vtaps = 2;
9266 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9267 		wm_high.lb_size = lb_size;
9268 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9269 		wm_high.num_heads = num_heads;
9270 
9271 		/* set for high clocks */
9272 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9273 
9274 		/* possibly force display priority to high */
9275 		/* should really do this at mode validation time... */
9276 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9277 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9278 		    !dce8_check_latency_hiding(&wm_high) ||
9279 		    (rdev->disp_priority == 2)) {
9280 			DRM_DEBUG_KMS("force priority to high\n");
9281 		}
9282 
9283 		/* watermark for low clocks */
9284 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9285 		    rdev->pm.dpm_enabled) {
9286 			wm_low.yclk =
9287 				radeon_dpm_get_mclk(rdev, true) * 10;
9288 			wm_low.sclk =
9289 				radeon_dpm_get_sclk(rdev, true) * 10;
9290 		} else {
9291 			wm_low.yclk = rdev->pm.current_mclk * 10;
9292 			wm_low.sclk = rdev->pm.current_sclk * 10;
9293 		}
9294 
9295 		wm_low.disp_clk = mode->clock;
9296 		wm_low.src_width = mode->crtc_hdisplay;
9297 		wm_low.active_time = active_time;
9298 		wm_low.blank_time = line_time - wm_low.active_time;
9299 		wm_low.interlaced = false;
9300 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9301 			wm_low.interlaced = true;
9302 		wm_low.vsc = radeon_crtc->vsc;
9303 		wm_low.vtaps = 1;
9304 		if (radeon_crtc->rmx_type != RMX_OFF)
9305 			wm_low.vtaps = 2;
9306 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9307 		wm_low.lb_size = lb_size;
9308 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9309 		wm_low.num_heads = num_heads;
9310 
9311 		/* set for low clocks */
9312 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9313 
9314 		/* possibly force display priority to high */
9315 		/* should really do this at mode validation time... */
9316 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9317 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9318 		    !dce8_check_latency_hiding(&wm_low) ||
9319 		    (rdev->disp_priority == 2)) {
9320 			DRM_DEBUG_KMS("force priority to high\n");
9321 		}
9322 
9323 		/* Save number of lines the linebuffer leads before the scanout */
9324 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9325 	}
9326 
9327 	/* select wm A */
9328 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9329 	tmp = wm_mask;
9330 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9331 	tmp |= LATENCY_WATERMARK_MASK(1);
9332 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9333 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9334 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9335 		LATENCY_HIGH_WATERMARK(line_time)));
9336 	/* select wm B */
9337 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9338 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9339 	tmp |= LATENCY_WATERMARK_MASK(2);
9340 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9341 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9342 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9343 		LATENCY_HIGH_WATERMARK(line_time)));
9344 	/* restore original selection */
9345 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9346 
9347 	/* save values for DPM */
9348 	radeon_crtc->line_time = line_time;
9349 	radeon_crtc->wm_high = latency_watermark_a;
9350 	radeon_crtc->wm_low = latency_watermark_b;
9351 }
9352 
9353 /**
9354  * dce8_bandwidth_update - program display watermarks
9355  *
9356  * @rdev: radeon_device pointer
9357  *
9358  * Calculate and program the display watermarks and line
9359  * buffer allocation (CIK).
9360  */
9361 void dce8_bandwidth_update(struct radeon_device *rdev)
9362 {
9363 	struct drm_display_mode *mode = NULL;
9364 	u32 num_heads = 0, lb_size;
9365 	int i;
9366 
9367 	if (!rdev->mode_info.mode_config_initialized)
9368 		return;
9369 
9370 	radeon_update_display_priority(rdev);
9371 
9372 	for (i = 0; i < rdev->num_crtc; i++) {
9373 		if (rdev->mode_info.crtcs[i]->base.enabled)
9374 			num_heads++;
9375 	}
9376 	for (i = 0; i < rdev->num_crtc; i++) {
9377 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9378 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9379 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9380 	}
9381 }
9382 
9383 /**
9384  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9385  *
9386  * @rdev: radeon_device pointer
9387  *
9388  * Fetches a GPU clock counter snapshot (SI).
9389  * Returns the 64 bit clock counter snapshot.
9390  */
9391 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9392 {
9393 	uint64_t clock;
9394 
9395 	mutex_lock(&rdev->gpu_clock_mutex);
9396 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9397 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9398 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9399 	mutex_unlock(&rdev->gpu_clock_mutex);
9400 	return clock;
9401 }
9402 
9403 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9404 			     u32 cntl_reg, u32 status_reg)
9405 {
9406 	int r, i;
9407 	struct atom_clock_dividers dividers;
9408 	uint32_t tmp;
9409 
9410 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9411 					   clock, false, &dividers);
9412 	if (r)
9413 		return r;
9414 
9415 	tmp = RREG32_SMC(cntl_reg);
9416 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9417 	tmp |= dividers.post_divider;
9418 	WREG32_SMC(cntl_reg, tmp);
9419 
9420 	for (i = 0; i < 100; i++) {
9421 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9422 			break;
9423 		mdelay(10);
9424 	}
9425 	if (i == 100)
9426 		return -ETIMEDOUT;
9427 
9428 	return 0;
9429 }
9430 
9431 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9432 {
9433 	int r = 0;
9434 
9435 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9436 	if (r)
9437 		return r;
9438 
9439 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9440 	return r;
9441 }
9442 
9443 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9444 {
9445 	int r, i;
9446 	struct atom_clock_dividers dividers;
9447 	u32 tmp;
9448 
9449 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9450 					   ecclk, false, &dividers);
9451 	if (r)
9452 		return r;
9453 
9454 	for (i = 0; i < 100; i++) {
9455 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9456 			break;
9457 		mdelay(10);
9458 	}
9459 	if (i == 100)
9460 		return -ETIMEDOUT;
9461 
9462 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9463 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9464 	tmp |= dividers.post_divider;
9465 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9466 
9467 	for (i = 0; i < 100; i++) {
9468 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9469 			break;
9470 		mdelay(10);
9471 	}
9472 	if (i == 100)
9473 		return -ETIMEDOUT;
9474 
9475 	return 0;
9476 }
9477 
9478 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9479 {
9480 	struct pci_dev *root = rdev->pdev->bus->self;
9481 	int bridge_pos, gpu_pos;
9482 	u32 speed_cntl, mask, current_data_rate;
9483 	int ret, i;
9484 	u16 tmp16;
9485 
9486 	if (pci_is_root_bus(rdev->pdev->bus))
9487 		return;
9488 
9489 	if (radeon_pcie_gen2 == 0)
9490 		return;
9491 
9492 	if (rdev->flags & RADEON_IS_IGP)
9493 		return;
9494 
9495 	if (!(rdev->flags & RADEON_IS_PCIE))
9496 		return;
9497 
9498 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9499 	if (ret != 0)
9500 		return;
9501 
9502 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9503 		return;
9504 
9505 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9506 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9507 		LC_CURRENT_DATA_RATE_SHIFT;
9508 	if (mask & DRM_PCIE_SPEED_80) {
9509 		if (current_data_rate == 2) {
9510 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9511 			return;
9512 		}
9513 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9514 	} else if (mask & DRM_PCIE_SPEED_50) {
9515 		if (current_data_rate == 1) {
9516 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9517 			return;
9518 		}
9519 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9520 	}
9521 
9522 	bridge_pos = pci_pcie_cap(root);
9523 	if (!bridge_pos)
9524 		return;
9525 
9526 	gpu_pos = pci_pcie_cap(rdev->pdev);
9527 	if (!gpu_pos)
9528 		return;
9529 
9530 	if (mask & DRM_PCIE_SPEED_80) {
9531 		/* re-try equalization if gen3 is not already enabled */
9532 		if (current_data_rate != 2) {
9533 			u16 bridge_cfg, gpu_cfg;
9534 			u16 bridge_cfg2, gpu_cfg2;
9535 			u32 max_lw, current_lw, tmp;
9536 
9537 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9538 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9539 
9540 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9541 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9542 
9543 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9544 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9545 
9546 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9547 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9548 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9549 
9550 			if (current_lw < max_lw) {
9551 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9552 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9553 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9554 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9555 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9556 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9557 				}
9558 			}
9559 
9560 			for (i = 0; i < 10; i++) {
9561 				/* check status */
9562 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9563 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9564 					break;
9565 
9566 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9567 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9568 
9569 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9570 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9571 
9572 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9573 				tmp |= LC_SET_QUIESCE;
9574 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9575 
9576 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9577 				tmp |= LC_REDO_EQ;
9578 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9579 
9580 				mdelay(100);
9581 
9582 				/* linkctl */
9583 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9584 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9585 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9586 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9587 
9588 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9589 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9590 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9591 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9592 
9593 				/* linkctl2 */
9594 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9595 				tmp16 &= ~((1 << 4) | (7 << 9));
9596 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9597 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9598 
9599 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9600 				tmp16 &= ~((1 << 4) | (7 << 9));
9601 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9602 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9603 
9604 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9605 				tmp &= ~LC_SET_QUIESCE;
9606 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9607 			}
9608 		}
9609 	}
9610 
9611 	/* set the link speed */
9612 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9613 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9614 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9615 
9616 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9617 	tmp16 &= ~0xf;
9618 	if (mask & DRM_PCIE_SPEED_80)
9619 		tmp16 |= 3; /* gen3 */
9620 	else if (mask & DRM_PCIE_SPEED_50)
9621 		tmp16 |= 2; /* gen2 */
9622 	else
9623 		tmp16 |= 1; /* gen1 */
9624 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9625 
9626 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9627 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9628 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9629 
9630 	for (i = 0; i < rdev->usec_timeout; i++) {
9631 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9632 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9633 			break;
9634 		udelay(1);
9635 	}
9636 }
9637 
9638 static void cik_program_aspm(struct radeon_device *rdev)
9639 {
9640 	u32 data, orig;
9641 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9642 	bool disable_clkreq = false;
9643 
9644 	if (radeon_aspm == 0)
9645 		return;
9646 
9647 	/* XXX double check IGPs */
9648 	if (rdev->flags & RADEON_IS_IGP)
9649 		return;
9650 
9651 	if (!(rdev->flags & RADEON_IS_PCIE))
9652 		return;
9653 
9654 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9655 	data &= ~LC_XMIT_N_FTS_MASK;
9656 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9657 	if (orig != data)
9658 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9659 
9660 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9661 	data |= LC_GO_TO_RECOVERY;
9662 	if (orig != data)
9663 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9664 
9665 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9666 	data |= P_IGNORE_EDB_ERR;
9667 	if (orig != data)
9668 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9669 
9670 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9671 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9672 	data |= LC_PMI_TO_L1_DIS;
9673 	if (!disable_l0s)
9674 		data |= LC_L0S_INACTIVITY(7);
9675 
9676 	if (!disable_l1) {
9677 		data |= LC_L1_INACTIVITY(7);
9678 		data &= ~LC_PMI_TO_L1_DIS;
9679 		if (orig != data)
9680 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9681 
9682 		if (!disable_plloff_in_l1) {
9683 			bool clk_req_support;
9684 
9685 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9686 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9687 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9688 			if (orig != data)
9689 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9690 
9691 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9692 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9693 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9694 			if (orig != data)
9695 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9696 
9697 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9698 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9699 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9700 			if (orig != data)
9701 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9702 
9703 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9704 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9705 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9706 			if (orig != data)
9707 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9708 
9709 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9710 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9711 			data |= LC_DYN_LANES_PWR_STATE(3);
9712 			if (orig != data)
9713 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9714 
9715 			if (!disable_clkreq &&
9716 			    !pci_is_root_bus(rdev->pdev->bus)) {
9717 				struct pci_dev *root = rdev->pdev->bus->self;
9718 				u32 lnkcap;
9719 
9720 				clk_req_support = false;
9721 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9722 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9723 					clk_req_support = true;
9724 			} else {
9725 				clk_req_support = false;
9726 			}
9727 
9728 			if (clk_req_support) {
9729 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9730 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9731 				if (orig != data)
9732 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9733 
9734 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9735 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9736 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9737 				if (orig != data)
9738 					WREG32_SMC(THM_CLK_CNTL, data);
9739 
9740 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9741 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9742 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9743 				if (orig != data)
9744 					WREG32_SMC(MISC_CLK_CTRL, data);
9745 
9746 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9747 				data &= ~BCLK_AS_XCLK;
9748 				if (orig != data)
9749 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9750 
9751 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9752 				data &= ~FORCE_BIF_REFCLK_EN;
9753 				if (orig != data)
9754 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9755 
9756 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9757 				data &= ~MPLL_CLKOUT_SEL_MASK;
9758 				data |= MPLL_CLKOUT_SEL(4);
9759 				if (orig != data)
9760 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9761 			}
9762 		}
9763 	} else {
9764 		if (orig != data)
9765 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9766 	}
9767 
9768 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9769 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9770 	if (orig != data)
9771 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9772 
9773 	if (!disable_l0s) {
9774 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9775 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9776 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9777 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9778 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9779 				data &= ~LC_L0S_INACTIVITY_MASK;
9780 				if (orig != data)
9781 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9782 			}
9783 		}
9784 	}
9785 }
9786