xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision f3a8b664)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
57 
58 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
69 MODULE_FIRMWARE("radeon/hawaii_me.bin");
70 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
72 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
74 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
75 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
84 
85 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
86 MODULE_FIRMWARE("radeon/kaveri_me.bin");
87 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
89 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
90 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
91 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
92 
93 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
94 MODULE_FIRMWARE("radeon/KABINI_me.bin");
95 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
96 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
97 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
98 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
99 
100 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
101 MODULE_FIRMWARE("radeon/kabini_me.bin");
102 MODULE_FIRMWARE("radeon/kabini_ce.bin");
103 MODULE_FIRMWARE("radeon/kabini_mec.bin");
104 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
105 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
106 
107 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
113 
114 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
115 MODULE_FIRMWARE("radeon/mullins_me.bin");
116 MODULE_FIRMWARE("radeon/mullins_ce.bin");
117 MODULE_FIRMWARE("radeon/mullins_mec.bin");
118 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
119 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
120 
121 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
122 extern void r600_ih_ring_fini(struct radeon_device *rdev);
123 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 extern void sumo_rlc_fini(struct radeon_device *rdev);
127 extern int sumo_rlc_init(struct radeon_device *rdev);
128 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
129 extern void si_rlc_reset(struct radeon_device *rdev);
130 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
131 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
132 extern int cik_sdma_resume(struct radeon_device *rdev);
133 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
134 extern void cik_sdma_fini(struct radeon_device *rdev);
135 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
136 static void cik_rlc_stop(struct radeon_device *rdev);
137 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
138 static void cik_program_aspm(struct radeon_device *rdev);
139 static void cik_init_pg(struct radeon_device *rdev);
140 static void cik_init_cg(struct radeon_device *rdev);
141 static void cik_fini_pg(struct radeon_device *rdev);
142 static void cik_fini_cg(struct radeon_device *rdev);
143 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
144 					  bool enable);
145 
146 /**
147  * cik_get_allowed_info_register - fetch the register for the info ioctl
148  *
149  * @rdev: radeon_device pointer
150  * @reg: register offset in bytes
151  * @val: register value
152  *
153  * Returns 0 for success or -EINVAL for an invalid register
154  *
155  */
156 int cik_get_allowed_info_register(struct radeon_device *rdev,
157 				  u32 reg, u32 *val)
158 {
159 	switch (reg) {
160 	case GRBM_STATUS:
161 	case GRBM_STATUS2:
162 	case GRBM_STATUS_SE0:
163 	case GRBM_STATUS_SE1:
164 	case GRBM_STATUS_SE2:
165 	case GRBM_STATUS_SE3:
166 	case SRBM_STATUS:
167 	case SRBM_STATUS2:
168 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
169 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
170 	case UVD_STATUS:
171 	/* TODO VCE */
172 		*val = RREG32(reg);
173 		return 0;
174 	default:
175 		return -EINVAL;
176 	}
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
188 	WREG32(CIK_DIDT_IND_INDEX, (reg));
189 	r = RREG32(CIK_DIDT_IND_DATA);
190 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
191 	return r;
192 }
193 
194 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
195 {
196 	unsigned long flags;
197 
198 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
199 	WREG32(CIK_DIDT_IND_INDEX, (reg));
200 	WREG32(CIK_DIDT_IND_DATA, (v));
201 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
202 }
203 
204 /* get temperature in millidegrees */
205 int ci_get_temp(struct radeon_device *rdev)
206 {
207 	u32 temp;
208 	int actual_temp = 0;
209 
210 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
211 		CTF_TEMP_SHIFT;
212 
213 	if (temp & 0x200)
214 		actual_temp = 255;
215 	else
216 		actual_temp = temp & 0x1ff;
217 
218 	actual_temp = actual_temp * 1000;
219 
220 	return actual_temp;
221 }
222 
223 /* get temperature in millidegrees */
224 int kv_get_temp(struct radeon_device *rdev)
225 {
226 	u32 temp;
227 	int actual_temp = 0;
228 
229 	temp = RREG32_SMC(0xC0300E0C);
230 
231 	if (temp)
232 		actual_temp = (temp / 8) - 49;
233 	else
234 		actual_temp = 0;
235 
236 	actual_temp = actual_temp * 1000;
237 
238 	return actual_temp;
239 }
240 
241 /*
242  * Indirect registers accessor
243  */
244 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
245 {
246 	unsigned long flags;
247 	u32 r;
248 
249 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250 	WREG32(PCIE_INDEX, reg);
251 	(void)RREG32(PCIE_INDEX);
252 	r = RREG32(PCIE_DATA);
253 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
254 	return r;
255 }
256 
257 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
258 {
259 	unsigned long flags;
260 
261 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
262 	WREG32(PCIE_INDEX, reg);
263 	(void)RREG32(PCIE_INDEX);
264 	WREG32(PCIE_DATA, v);
265 	(void)RREG32(PCIE_DATA);
266 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
267 }
268 
269 static const u32 spectre_rlc_save_restore_register_list[] =
270 {
271 	(0x0e00 << 16) | (0xc12c >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc140 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc150 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc15c >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc168 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc170 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc178 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc204 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b4 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2b8 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2bc >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc2c0 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x8228 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x829c >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0x869c >> 2),
300 	0x00000000,
301 	(0x0600 << 16) | (0x98f4 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x98f8 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0x9900 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0xc260 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x90e8 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c000 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x3c00c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x8c1c >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x9700 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x4e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x5e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x6e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x7e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x8e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0x9e00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xae00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0xbe00 << 16) | (0xcd20 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x89bc >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x8900 >> 2),
340 	0x00000000,
341 	0x3,
342 	(0x0e00 << 16) | (0xc130 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc134 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc1fc >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc208 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc264 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc268 >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc26c >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc270 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc274 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc278 >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc27c >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc280 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc284 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc288 >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc28c >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc290 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc294 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc298 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc29c >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a0 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a4 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2a8 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2ac  >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc2b0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x301d0 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30238 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30250 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30254 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x30258 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x3025c >> 2),
401 	0x00000000,
402 	(0x4e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x5e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x6e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x7e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x8e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0x9e00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xae00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0xbe00 << 16) | (0xc900 >> 2),
417 	0x00000000,
418 	(0x4e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x5e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x6e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x7e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x8e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0x9e00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xae00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0xbe00 << 16) | (0xc904 >> 2),
433 	0x00000000,
434 	(0x4e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x5e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x6e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x7e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x8e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0x9e00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xae00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0xbe00 << 16) | (0xc908 >> 2),
449 	0x00000000,
450 	(0x4e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x5e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x6e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x7e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x8e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0x9e00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xae00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0xbe00 << 16) | (0xc90c >> 2),
465 	0x00000000,
466 	(0x4e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x5e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x6e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x7e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x8e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0x9e00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xae00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0xbe00 << 16) | (0xc910 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0xc99c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0x9834 >> 2),
485 	0x00000000,
486 	(0x0000 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0001 << 16) | (0x30f00 >> 2),
489 	0x00000000,
490 	(0x0000 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0001 << 16) | (0x30f04 >> 2),
493 	0x00000000,
494 	(0x0000 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0001 << 16) | (0x30f08 >> 2),
497 	0x00000000,
498 	(0x0000 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0001 << 16) | (0x30f0c >> 2),
501 	0x00000000,
502 	(0x0600 << 16) | (0x9b7c >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a14 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x8a18 >> 2),
507 	0x00000000,
508 	(0x0600 << 16) | (0x30a00 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bf0 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8bcc >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8b24 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x30a04 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a10 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a14 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a18 >> 2),
523 	0x00000000,
524 	(0x0600 << 16) | (0x30a2c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc700 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc704 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc708 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xc768 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc770 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc774 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc778 >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc77c >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc780 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc784 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc788 >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc78c >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc798 >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc79c >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a0 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a4 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7a8 >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7ac >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b0 >> 2),
563 	0x00000000,
564 	(0x0400 << 16) | (0xc7b4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x9100 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x3c010 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92a8 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92ac >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92b8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92bc >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c0 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92c8 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92cc >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x92d0 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c00 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c04 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c20 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c38 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x8c3c >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xae00 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x9604 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac08 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac0c >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac10 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac14 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac58 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac68 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac6c >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac70 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac74 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac78 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac7c >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac80 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac84 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac88 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xac8c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x970c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9714 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9718 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x971c >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x4e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x5e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x6e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x7e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x8e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0x9e00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xae00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0xbe00 << 16) | (0x31068 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd10 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xcd14 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b0 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b4 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88b8 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x88bc >> 2),
671 	0x00000000,
672 	(0x0400 << 16) | (0x89c0 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c4 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88c8 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d0 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d4 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x88d8 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x8980 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x30938 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x3093c >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x30940 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x89a0 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30900 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x30904 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x89b4 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c210 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c214 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x3c218 >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x8904 >> 2),
707 	0x00000000,
708 	0x5,
709 	(0x0e00 << 16) | (0x8c28 >> 2),
710 	(0x0e00 << 16) | (0x8c2c >> 2),
711 	(0x0e00 << 16) | (0x8c30 >> 2),
712 	(0x0e00 << 16) | (0x8c34 >> 2),
713 	(0x0e00 << 16) | (0x9600 >> 2),
714 };
715 
716 static const u32 kalindi_rlc_save_restore_register_list[] =
717 {
718 	(0x0e00 << 16) | (0xc12c >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc140 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc150 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc15c >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc168 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc170 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc204 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b4 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2b8 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2bc >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc2c0 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x8228 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x829c >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x869c >> 2),
745 	0x00000000,
746 	(0x0600 << 16) | (0x98f4 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x98f8 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0x9900 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0xc260 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x90e8 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c000 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x3c00c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x8c1c >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x9700 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x4e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x5e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x6e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x7e00 << 16) | (0xcd20 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x89bc >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0x8900 >> 2),
777 	0x00000000,
778 	0x3,
779 	(0x0e00 << 16) | (0xc130 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc134 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc1fc >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc208 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc264 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc268 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc26c >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc270 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc274 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc28c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc290 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc294 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc298 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a0 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a4 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2a8 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xc2ac >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x301d0 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30238 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30250 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30254 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x30258 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x3025c >> 2),
824 	0x00000000,
825 	(0x4e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x5e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x6e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x7e00 << 16) | (0xc900 >> 2),
832 	0x00000000,
833 	(0x4e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x5e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x6e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x7e00 << 16) | (0xc904 >> 2),
840 	0x00000000,
841 	(0x4e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x5e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x6e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x7e00 << 16) | (0xc908 >> 2),
848 	0x00000000,
849 	(0x4e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x5e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x6e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x7e00 << 16) | (0xc90c >> 2),
856 	0x00000000,
857 	(0x4e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x5e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x6e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x7e00 << 16) | (0xc910 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0xc99c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x9834 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f00 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f04 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f08 >> 2),
874 	0x00000000,
875 	(0x0000 << 16) | (0x30f0c >> 2),
876 	0x00000000,
877 	(0x0600 << 16) | (0x9b7c >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x8a18 >> 2),
882 	0x00000000,
883 	(0x0600 << 16) | (0x30a00 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bf0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8bcc >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x8b24 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x30a04 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a10 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a14 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a18 >> 2),
898 	0x00000000,
899 	(0x0600 << 16) | (0x30a2c >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc700 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc704 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc708 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0xc768 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc770 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc774 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc798 >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0xc79c >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x9100 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x3c010 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c00 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c04 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c20 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c38 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x8c3c >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0xae00 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x9604 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac08 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac0c >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac10 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac14 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac58 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac68 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac6c >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac70 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac74 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac78 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac7c >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac80 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac84 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac88 >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0xac8c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x970c >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9714 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x9718 >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x971c >> 2),
972 	0x00000000,
973 	(0x0e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x4e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x5e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x6e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x7e00 << 16) | (0x31068 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd10 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0xcd14 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b0 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b4 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88b8 >> 2),
992 	0x00000000,
993 	(0x0e00 << 16) | (0x88bc >> 2),
994 	0x00000000,
995 	(0x0400 << 16) | (0x89c0 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c4 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88c8 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d0 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d4 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x88d8 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x8980 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x30938 >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x3093c >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x30940 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x89a0 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30900 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x30904 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x89b4 >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3e1fc >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c210 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c214 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x3c218 >> 2),
1030 	0x00000000,
1031 	(0x0e00 << 16) | (0x8904 >> 2),
1032 	0x00000000,
1033 	0x5,
1034 	(0x0e00 << 16) | (0x8c28 >> 2),
1035 	(0x0e00 << 16) | (0x8c2c >> 2),
1036 	(0x0e00 << 16) | (0x8c30 >> 2),
1037 	(0x0e00 << 16) | (0x8c34 >> 2),
1038 	(0x0e00 << 16) | (0x9600 >> 2),
1039 };
1040 
1041 static const u32 bonaire_golden_spm_registers[] =
1042 {
1043 	0x30800, 0xe0ffffff, 0xe0000000
1044 };
1045 
1046 static const u32 bonaire_golden_common_registers[] =
1047 {
1048 	0xc770, 0xffffffff, 0x00000800,
1049 	0xc774, 0xffffffff, 0x00000800,
1050 	0xc798, 0xffffffff, 0x00007fbf,
1051 	0xc79c, 0xffffffff, 0x00007faf
1052 };
1053 
1054 static const u32 bonaire_golden_registers[] =
1055 {
1056 	0x3354, 0x00000333, 0x00000333,
1057 	0x3350, 0x000c0fc0, 0x00040200,
1058 	0x9a10, 0x00010000, 0x00058208,
1059 	0x3c000, 0xffff1fff, 0x00140000,
1060 	0x3c200, 0xfdfc0fff, 0x00000100,
1061 	0x3c234, 0x40000000, 0x40000200,
1062 	0x9830, 0xffffffff, 0x00000000,
1063 	0x9834, 0xf00fffff, 0x00000400,
1064 	0x9838, 0x0002021c, 0x00020200,
1065 	0xc78, 0x00000080, 0x00000000,
1066 	0x5bb0, 0x000000f0, 0x00000070,
1067 	0x5bc0, 0xf0311fff, 0x80300000,
1068 	0x98f8, 0x73773777, 0x12010001,
1069 	0x350c, 0x00810000, 0x408af000,
1070 	0x7030, 0x31000111, 0x00000011,
1071 	0x2f48, 0x73773777, 0x12010001,
1072 	0x220c, 0x00007fb6, 0x0021a1b1,
1073 	0x2210, 0x00007fb6, 0x002021b1,
1074 	0x2180, 0x00007fb6, 0x00002191,
1075 	0x2218, 0x00007fb6, 0x002121b1,
1076 	0x221c, 0x00007fb6, 0x002021b1,
1077 	0x21dc, 0x00007fb6, 0x00002191,
1078 	0x21e0, 0x00007fb6, 0x00002191,
1079 	0x3628, 0x0000003f, 0x0000000a,
1080 	0x362c, 0x0000003f, 0x0000000a,
1081 	0x2ae4, 0x00073ffe, 0x000022a2,
1082 	0x240c, 0x000007ff, 0x00000000,
1083 	0x8a14, 0xf000003f, 0x00000007,
1084 	0x8bf0, 0x00002001, 0x00000001,
1085 	0x8b24, 0xffffffff, 0x00ffffff,
1086 	0x30a04, 0x0000ff0f, 0x00000000,
1087 	0x28a4c, 0x07ffffff, 0x06000000,
1088 	0x4d8, 0x00000fff, 0x00000100,
1089 	0x3e78, 0x00000001, 0x00000002,
1090 	0x9100, 0x03000000, 0x0362c688,
1091 	0x8c00, 0x000000ff, 0x00000001,
1092 	0xe40, 0x00001fff, 0x00001fff,
1093 	0x9060, 0x0000007f, 0x00000020,
1094 	0x9508, 0x00010000, 0x00010000,
1095 	0xac14, 0x000003ff, 0x000000f3,
1096 	0xac0c, 0xffffffff, 0x00001032
1097 };
1098 
1099 static const u32 bonaire_mgcg_cgcg_init[] =
1100 {
1101 	0xc420, 0xffffffff, 0xfffffffc,
1102 	0x30800, 0xffffffff, 0xe0000000,
1103 	0x3c2a0, 0xffffffff, 0x00000100,
1104 	0x3c208, 0xffffffff, 0x00000100,
1105 	0x3c2c0, 0xffffffff, 0xc0000100,
1106 	0x3c2c8, 0xffffffff, 0xc0000100,
1107 	0x3c2c4, 0xffffffff, 0xc0000100,
1108 	0x55e4, 0xffffffff, 0x00600100,
1109 	0x3c280, 0xffffffff, 0x00000100,
1110 	0x3c214, 0xffffffff, 0x06000100,
1111 	0x3c220, 0xffffffff, 0x00000100,
1112 	0x3c218, 0xffffffff, 0x06000100,
1113 	0x3c204, 0xffffffff, 0x00000100,
1114 	0x3c2e0, 0xffffffff, 0x00000100,
1115 	0x3c224, 0xffffffff, 0x00000100,
1116 	0x3c200, 0xffffffff, 0x00000100,
1117 	0x3c230, 0xffffffff, 0x00000100,
1118 	0x3c234, 0xffffffff, 0x00000100,
1119 	0x3c250, 0xffffffff, 0x00000100,
1120 	0x3c254, 0xffffffff, 0x00000100,
1121 	0x3c258, 0xffffffff, 0x00000100,
1122 	0x3c25c, 0xffffffff, 0x00000100,
1123 	0x3c260, 0xffffffff, 0x00000100,
1124 	0x3c27c, 0xffffffff, 0x00000100,
1125 	0x3c278, 0xffffffff, 0x00000100,
1126 	0x3c210, 0xffffffff, 0x06000100,
1127 	0x3c290, 0xffffffff, 0x00000100,
1128 	0x3c274, 0xffffffff, 0x00000100,
1129 	0x3c2b4, 0xffffffff, 0x00000100,
1130 	0x3c2b0, 0xffffffff, 0x00000100,
1131 	0x3c270, 0xffffffff, 0x00000100,
1132 	0x30800, 0xffffffff, 0xe0000000,
1133 	0x3c020, 0xffffffff, 0x00010000,
1134 	0x3c024, 0xffffffff, 0x00030002,
1135 	0x3c028, 0xffffffff, 0x00040007,
1136 	0x3c02c, 0xffffffff, 0x00060005,
1137 	0x3c030, 0xffffffff, 0x00090008,
1138 	0x3c034, 0xffffffff, 0x00010000,
1139 	0x3c038, 0xffffffff, 0x00030002,
1140 	0x3c03c, 0xffffffff, 0x00040007,
1141 	0x3c040, 0xffffffff, 0x00060005,
1142 	0x3c044, 0xffffffff, 0x00090008,
1143 	0x3c048, 0xffffffff, 0x00010000,
1144 	0x3c04c, 0xffffffff, 0x00030002,
1145 	0x3c050, 0xffffffff, 0x00040007,
1146 	0x3c054, 0xffffffff, 0x00060005,
1147 	0x3c058, 0xffffffff, 0x00090008,
1148 	0x3c05c, 0xffffffff, 0x00010000,
1149 	0x3c060, 0xffffffff, 0x00030002,
1150 	0x3c064, 0xffffffff, 0x00040007,
1151 	0x3c068, 0xffffffff, 0x00060005,
1152 	0x3c06c, 0xffffffff, 0x00090008,
1153 	0x3c070, 0xffffffff, 0x00010000,
1154 	0x3c074, 0xffffffff, 0x00030002,
1155 	0x3c078, 0xffffffff, 0x00040007,
1156 	0x3c07c, 0xffffffff, 0x00060005,
1157 	0x3c080, 0xffffffff, 0x00090008,
1158 	0x3c084, 0xffffffff, 0x00010000,
1159 	0x3c088, 0xffffffff, 0x00030002,
1160 	0x3c08c, 0xffffffff, 0x00040007,
1161 	0x3c090, 0xffffffff, 0x00060005,
1162 	0x3c094, 0xffffffff, 0x00090008,
1163 	0x3c098, 0xffffffff, 0x00010000,
1164 	0x3c09c, 0xffffffff, 0x00030002,
1165 	0x3c0a0, 0xffffffff, 0x00040007,
1166 	0x3c0a4, 0xffffffff, 0x00060005,
1167 	0x3c0a8, 0xffffffff, 0x00090008,
1168 	0x3c000, 0xffffffff, 0x96e00200,
1169 	0x8708, 0xffffffff, 0x00900100,
1170 	0xc424, 0xffffffff, 0x0020003f,
1171 	0x38, 0xffffffff, 0x0140001c,
1172 	0x3c, 0x000f0000, 0x000f0000,
1173 	0x220, 0xffffffff, 0xC060000C,
1174 	0x224, 0xc0000fff, 0x00000100,
1175 	0xf90, 0xffffffff, 0x00000100,
1176 	0xf98, 0x00000101, 0x00000000,
1177 	0x20a8, 0xffffffff, 0x00000104,
1178 	0x55e4, 0xff000fff, 0x00000100,
1179 	0x30cc, 0xc0000fff, 0x00000104,
1180 	0xc1e4, 0x00000001, 0x00000001,
1181 	0xd00c, 0xff000ff0, 0x00000100,
1182 	0xd80c, 0xff000ff0, 0x00000100
1183 };
1184 
1185 static const u32 spectre_golden_spm_registers[] =
1186 {
1187 	0x30800, 0xe0ffffff, 0xe0000000
1188 };
1189 
1190 static const u32 spectre_golden_common_registers[] =
1191 {
1192 	0xc770, 0xffffffff, 0x00000800,
1193 	0xc774, 0xffffffff, 0x00000800,
1194 	0xc798, 0xffffffff, 0x00007fbf,
1195 	0xc79c, 0xffffffff, 0x00007faf
1196 };
1197 
1198 static const u32 spectre_golden_registers[] =
1199 {
1200 	0x3c000, 0xffff1fff, 0x96940200,
1201 	0x3c00c, 0xffff0001, 0xff000000,
1202 	0x3c200, 0xfffc0fff, 0x00000100,
1203 	0x6ed8, 0x00010101, 0x00010000,
1204 	0x9834, 0xf00fffff, 0x00000400,
1205 	0x9838, 0xfffffffc, 0x00020200,
1206 	0x5bb0, 0x000000f0, 0x00000070,
1207 	0x5bc0, 0xf0311fff, 0x80300000,
1208 	0x98f8, 0x73773777, 0x12010001,
1209 	0x9b7c, 0x00ff0000, 0x00fc0000,
1210 	0x2f48, 0x73773777, 0x12010001,
1211 	0x8a14, 0xf000003f, 0x00000007,
1212 	0x8b24, 0xffffffff, 0x00ffffff,
1213 	0x28350, 0x3f3f3fff, 0x00000082,
1214 	0x28354, 0x0000003f, 0x00000000,
1215 	0x3e78, 0x00000001, 0x00000002,
1216 	0x913c, 0xffff03df, 0x00000004,
1217 	0xc768, 0x00000008, 0x00000008,
1218 	0x8c00, 0x000008ff, 0x00000800,
1219 	0x9508, 0x00010000, 0x00010000,
1220 	0xac0c, 0xffffffff, 0x54763210,
1221 	0x214f8, 0x01ff01ff, 0x00000002,
1222 	0x21498, 0x007ff800, 0x00200000,
1223 	0x2015c, 0xffffffff, 0x00000f40,
1224 	0x30934, 0xffffffff, 0x00000001
1225 };
1226 
1227 static const u32 spectre_mgcg_cgcg_init[] =
1228 {
1229 	0xc420, 0xffffffff, 0xfffffffc,
1230 	0x30800, 0xffffffff, 0xe0000000,
1231 	0x3c2a0, 0xffffffff, 0x00000100,
1232 	0x3c208, 0xffffffff, 0x00000100,
1233 	0x3c2c0, 0xffffffff, 0x00000100,
1234 	0x3c2c8, 0xffffffff, 0x00000100,
1235 	0x3c2c4, 0xffffffff, 0x00000100,
1236 	0x55e4, 0xffffffff, 0x00600100,
1237 	0x3c280, 0xffffffff, 0x00000100,
1238 	0x3c214, 0xffffffff, 0x06000100,
1239 	0x3c220, 0xffffffff, 0x00000100,
1240 	0x3c218, 0xffffffff, 0x06000100,
1241 	0x3c204, 0xffffffff, 0x00000100,
1242 	0x3c2e0, 0xffffffff, 0x00000100,
1243 	0x3c224, 0xffffffff, 0x00000100,
1244 	0x3c200, 0xffffffff, 0x00000100,
1245 	0x3c230, 0xffffffff, 0x00000100,
1246 	0x3c234, 0xffffffff, 0x00000100,
1247 	0x3c250, 0xffffffff, 0x00000100,
1248 	0x3c254, 0xffffffff, 0x00000100,
1249 	0x3c258, 0xffffffff, 0x00000100,
1250 	0x3c25c, 0xffffffff, 0x00000100,
1251 	0x3c260, 0xffffffff, 0x00000100,
1252 	0x3c27c, 0xffffffff, 0x00000100,
1253 	0x3c278, 0xffffffff, 0x00000100,
1254 	0x3c210, 0xffffffff, 0x06000100,
1255 	0x3c290, 0xffffffff, 0x00000100,
1256 	0x3c274, 0xffffffff, 0x00000100,
1257 	0x3c2b4, 0xffffffff, 0x00000100,
1258 	0x3c2b0, 0xffffffff, 0x00000100,
1259 	0x3c270, 0xffffffff, 0x00000100,
1260 	0x30800, 0xffffffff, 0xe0000000,
1261 	0x3c020, 0xffffffff, 0x00010000,
1262 	0x3c024, 0xffffffff, 0x00030002,
1263 	0x3c028, 0xffffffff, 0x00040007,
1264 	0x3c02c, 0xffffffff, 0x00060005,
1265 	0x3c030, 0xffffffff, 0x00090008,
1266 	0x3c034, 0xffffffff, 0x00010000,
1267 	0x3c038, 0xffffffff, 0x00030002,
1268 	0x3c03c, 0xffffffff, 0x00040007,
1269 	0x3c040, 0xffffffff, 0x00060005,
1270 	0x3c044, 0xffffffff, 0x00090008,
1271 	0x3c048, 0xffffffff, 0x00010000,
1272 	0x3c04c, 0xffffffff, 0x00030002,
1273 	0x3c050, 0xffffffff, 0x00040007,
1274 	0x3c054, 0xffffffff, 0x00060005,
1275 	0x3c058, 0xffffffff, 0x00090008,
1276 	0x3c05c, 0xffffffff, 0x00010000,
1277 	0x3c060, 0xffffffff, 0x00030002,
1278 	0x3c064, 0xffffffff, 0x00040007,
1279 	0x3c068, 0xffffffff, 0x00060005,
1280 	0x3c06c, 0xffffffff, 0x00090008,
1281 	0x3c070, 0xffffffff, 0x00010000,
1282 	0x3c074, 0xffffffff, 0x00030002,
1283 	0x3c078, 0xffffffff, 0x00040007,
1284 	0x3c07c, 0xffffffff, 0x00060005,
1285 	0x3c080, 0xffffffff, 0x00090008,
1286 	0x3c084, 0xffffffff, 0x00010000,
1287 	0x3c088, 0xffffffff, 0x00030002,
1288 	0x3c08c, 0xffffffff, 0x00040007,
1289 	0x3c090, 0xffffffff, 0x00060005,
1290 	0x3c094, 0xffffffff, 0x00090008,
1291 	0x3c098, 0xffffffff, 0x00010000,
1292 	0x3c09c, 0xffffffff, 0x00030002,
1293 	0x3c0a0, 0xffffffff, 0x00040007,
1294 	0x3c0a4, 0xffffffff, 0x00060005,
1295 	0x3c0a8, 0xffffffff, 0x00090008,
1296 	0x3c0ac, 0xffffffff, 0x00010000,
1297 	0x3c0b0, 0xffffffff, 0x00030002,
1298 	0x3c0b4, 0xffffffff, 0x00040007,
1299 	0x3c0b8, 0xffffffff, 0x00060005,
1300 	0x3c0bc, 0xffffffff, 0x00090008,
1301 	0x3c000, 0xffffffff, 0x96e00200,
1302 	0x8708, 0xffffffff, 0x00900100,
1303 	0xc424, 0xffffffff, 0x0020003f,
1304 	0x38, 0xffffffff, 0x0140001c,
1305 	0x3c, 0x000f0000, 0x000f0000,
1306 	0x220, 0xffffffff, 0xC060000C,
1307 	0x224, 0xc0000fff, 0x00000100,
1308 	0xf90, 0xffffffff, 0x00000100,
1309 	0xf98, 0x00000101, 0x00000000,
1310 	0x20a8, 0xffffffff, 0x00000104,
1311 	0x55e4, 0xff000fff, 0x00000100,
1312 	0x30cc, 0xc0000fff, 0x00000104,
1313 	0xc1e4, 0x00000001, 0x00000001,
1314 	0xd00c, 0xff000ff0, 0x00000100,
1315 	0xd80c, 0xff000ff0, 0x00000100
1316 };
1317 
1318 static const u32 kalindi_golden_spm_registers[] =
1319 {
1320 	0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322 
1323 static const u32 kalindi_golden_common_registers[] =
1324 {
1325 	0xc770, 0xffffffff, 0x00000800,
1326 	0xc774, 0xffffffff, 0x00000800,
1327 	0xc798, 0xffffffff, 0x00007fbf,
1328 	0xc79c, 0xffffffff, 0x00007faf
1329 };
1330 
1331 static const u32 kalindi_golden_registers[] =
1332 {
1333 	0x3c000, 0xffffdfff, 0x6e944040,
1334 	0x55e4, 0xff607fff, 0xfc000100,
1335 	0x3c220, 0xff000fff, 0x00000100,
1336 	0x3c224, 0xff000fff, 0x00000100,
1337 	0x3c200, 0xfffc0fff, 0x00000100,
1338 	0x6ed8, 0x00010101, 0x00010000,
1339 	0x9830, 0xffffffff, 0x00000000,
1340 	0x9834, 0xf00fffff, 0x00000400,
1341 	0x5bb0, 0x000000f0, 0x00000070,
1342 	0x5bc0, 0xf0311fff, 0x80300000,
1343 	0x98f8, 0x73773777, 0x12010001,
1344 	0x98fc, 0xffffffff, 0x00000010,
1345 	0x9b7c, 0x00ff0000, 0x00fc0000,
1346 	0x8030, 0x00001f0f, 0x0000100a,
1347 	0x2f48, 0x73773777, 0x12010001,
1348 	0x2408, 0x000fffff, 0x000c007f,
1349 	0x8a14, 0xf000003f, 0x00000007,
1350 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1351 	0x30a04, 0x0000ff0f, 0x00000000,
1352 	0x28a4c, 0x07ffffff, 0x06000000,
1353 	0x4d8, 0x00000fff, 0x00000100,
1354 	0x3e78, 0x00000001, 0x00000002,
1355 	0xc768, 0x00000008, 0x00000008,
1356 	0x8c00, 0x000000ff, 0x00000003,
1357 	0x214f8, 0x01ff01ff, 0x00000002,
1358 	0x21498, 0x007ff800, 0x00200000,
1359 	0x2015c, 0xffffffff, 0x00000f40,
1360 	0x88c4, 0x001f3ae3, 0x00000082,
1361 	0x88d4, 0x0000001f, 0x00000010,
1362 	0x30934, 0xffffffff, 0x00000000
1363 };
1364 
1365 static const u32 kalindi_mgcg_cgcg_init[] =
1366 {
1367 	0xc420, 0xffffffff, 0xfffffffc,
1368 	0x30800, 0xffffffff, 0xe0000000,
1369 	0x3c2a0, 0xffffffff, 0x00000100,
1370 	0x3c208, 0xffffffff, 0x00000100,
1371 	0x3c2c0, 0xffffffff, 0x00000100,
1372 	0x3c2c8, 0xffffffff, 0x00000100,
1373 	0x3c2c4, 0xffffffff, 0x00000100,
1374 	0x55e4, 0xffffffff, 0x00600100,
1375 	0x3c280, 0xffffffff, 0x00000100,
1376 	0x3c214, 0xffffffff, 0x06000100,
1377 	0x3c220, 0xffffffff, 0x00000100,
1378 	0x3c218, 0xffffffff, 0x06000100,
1379 	0x3c204, 0xffffffff, 0x00000100,
1380 	0x3c2e0, 0xffffffff, 0x00000100,
1381 	0x3c224, 0xffffffff, 0x00000100,
1382 	0x3c200, 0xffffffff, 0x00000100,
1383 	0x3c230, 0xffffffff, 0x00000100,
1384 	0x3c234, 0xffffffff, 0x00000100,
1385 	0x3c250, 0xffffffff, 0x00000100,
1386 	0x3c254, 0xffffffff, 0x00000100,
1387 	0x3c258, 0xffffffff, 0x00000100,
1388 	0x3c25c, 0xffffffff, 0x00000100,
1389 	0x3c260, 0xffffffff, 0x00000100,
1390 	0x3c27c, 0xffffffff, 0x00000100,
1391 	0x3c278, 0xffffffff, 0x00000100,
1392 	0x3c210, 0xffffffff, 0x06000100,
1393 	0x3c290, 0xffffffff, 0x00000100,
1394 	0x3c274, 0xffffffff, 0x00000100,
1395 	0x3c2b4, 0xffffffff, 0x00000100,
1396 	0x3c2b0, 0xffffffff, 0x00000100,
1397 	0x3c270, 0xffffffff, 0x00000100,
1398 	0x30800, 0xffffffff, 0xe0000000,
1399 	0x3c020, 0xffffffff, 0x00010000,
1400 	0x3c024, 0xffffffff, 0x00030002,
1401 	0x3c028, 0xffffffff, 0x00040007,
1402 	0x3c02c, 0xffffffff, 0x00060005,
1403 	0x3c030, 0xffffffff, 0x00090008,
1404 	0x3c034, 0xffffffff, 0x00010000,
1405 	0x3c038, 0xffffffff, 0x00030002,
1406 	0x3c03c, 0xffffffff, 0x00040007,
1407 	0x3c040, 0xffffffff, 0x00060005,
1408 	0x3c044, 0xffffffff, 0x00090008,
1409 	0x3c000, 0xffffffff, 0x96e00200,
1410 	0x8708, 0xffffffff, 0x00900100,
1411 	0xc424, 0xffffffff, 0x0020003f,
1412 	0x38, 0xffffffff, 0x0140001c,
1413 	0x3c, 0x000f0000, 0x000f0000,
1414 	0x220, 0xffffffff, 0xC060000C,
1415 	0x224, 0xc0000fff, 0x00000100,
1416 	0x20a8, 0xffffffff, 0x00000104,
1417 	0x55e4, 0xff000fff, 0x00000100,
1418 	0x30cc, 0xc0000fff, 0x00000104,
1419 	0xc1e4, 0x00000001, 0x00000001,
1420 	0xd00c, 0xff000ff0, 0x00000100,
1421 	0xd80c, 0xff000ff0, 0x00000100
1422 };
1423 
1424 static const u32 hawaii_golden_spm_registers[] =
1425 {
1426 	0x30800, 0xe0ffffff, 0xe0000000
1427 };
1428 
1429 static const u32 hawaii_golden_common_registers[] =
1430 {
1431 	0x30800, 0xffffffff, 0xe0000000,
1432 	0x28350, 0xffffffff, 0x3a00161a,
1433 	0x28354, 0xffffffff, 0x0000002e,
1434 	0x9a10, 0xffffffff, 0x00018208,
1435 	0x98f8, 0xffffffff, 0x12011003
1436 };
1437 
1438 static const u32 hawaii_golden_registers[] =
1439 {
1440 	0x3354, 0x00000333, 0x00000333,
1441 	0x9a10, 0x00010000, 0x00058208,
1442 	0x9830, 0xffffffff, 0x00000000,
1443 	0x9834, 0xf00fffff, 0x00000400,
1444 	0x9838, 0x0002021c, 0x00020200,
1445 	0xc78, 0x00000080, 0x00000000,
1446 	0x5bb0, 0x000000f0, 0x00000070,
1447 	0x5bc0, 0xf0311fff, 0x80300000,
1448 	0x350c, 0x00810000, 0x408af000,
1449 	0x7030, 0x31000111, 0x00000011,
1450 	0x2f48, 0x73773777, 0x12010001,
1451 	0x2120, 0x0000007f, 0x0000001b,
1452 	0x21dc, 0x00007fb6, 0x00002191,
1453 	0x3628, 0x0000003f, 0x0000000a,
1454 	0x362c, 0x0000003f, 0x0000000a,
1455 	0x2ae4, 0x00073ffe, 0x000022a2,
1456 	0x240c, 0x000007ff, 0x00000000,
1457 	0x8bf0, 0x00002001, 0x00000001,
1458 	0x8b24, 0xffffffff, 0x00ffffff,
1459 	0x30a04, 0x0000ff0f, 0x00000000,
1460 	0x28a4c, 0x07ffffff, 0x06000000,
1461 	0x3e78, 0x00000001, 0x00000002,
1462 	0xc768, 0x00000008, 0x00000008,
1463 	0xc770, 0x00000f00, 0x00000800,
1464 	0xc774, 0x00000f00, 0x00000800,
1465 	0xc798, 0x00ffffff, 0x00ff7fbf,
1466 	0xc79c, 0x00ffffff, 0x00ff7faf,
1467 	0x8c00, 0x000000ff, 0x00000800,
1468 	0xe40, 0x00001fff, 0x00001fff,
1469 	0x9060, 0x0000007f, 0x00000020,
1470 	0x9508, 0x00010000, 0x00010000,
1471 	0xae00, 0x00100000, 0x000ff07c,
1472 	0xac14, 0x000003ff, 0x0000000f,
1473 	0xac10, 0xffffffff, 0x7564fdec,
1474 	0xac0c, 0xffffffff, 0x3120b9a8,
1475 	0xac08, 0x20000000, 0x0f9c0000
1476 };
1477 
1478 static const u32 hawaii_mgcg_cgcg_init[] =
1479 {
1480 	0xc420, 0xffffffff, 0xfffffffd,
1481 	0x30800, 0xffffffff, 0xe0000000,
1482 	0x3c2a0, 0xffffffff, 0x00000100,
1483 	0x3c208, 0xffffffff, 0x00000100,
1484 	0x3c2c0, 0xffffffff, 0x00000100,
1485 	0x3c2c8, 0xffffffff, 0x00000100,
1486 	0x3c2c4, 0xffffffff, 0x00000100,
1487 	0x55e4, 0xffffffff, 0x00200100,
1488 	0x3c280, 0xffffffff, 0x00000100,
1489 	0x3c214, 0xffffffff, 0x06000100,
1490 	0x3c220, 0xffffffff, 0x00000100,
1491 	0x3c218, 0xffffffff, 0x06000100,
1492 	0x3c204, 0xffffffff, 0x00000100,
1493 	0x3c2e0, 0xffffffff, 0x00000100,
1494 	0x3c224, 0xffffffff, 0x00000100,
1495 	0x3c200, 0xffffffff, 0x00000100,
1496 	0x3c230, 0xffffffff, 0x00000100,
1497 	0x3c234, 0xffffffff, 0x00000100,
1498 	0x3c250, 0xffffffff, 0x00000100,
1499 	0x3c254, 0xffffffff, 0x00000100,
1500 	0x3c258, 0xffffffff, 0x00000100,
1501 	0x3c25c, 0xffffffff, 0x00000100,
1502 	0x3c260, 0xffffffff, 0x00000100,
1503 	0x3c27c, 0xffffffff, 0x00000100,
1504 	0x3c278, 0xffffffff, 0x00000100,
1505 	0x3c210, 0xffffffff, 0x06000100,
1506 	0x3c290, 0xffffffff, 0x00000100,
1507 	0x3c274, 0xffffffff, 0x00000100,
1508 	0x3c2b4, 0xffffffff, 0x00000100,
1509 	0x3c2b0, 0xffffffff, 0x00000100,
1510 	0x3c270, 0xffffffff, 0x00000100,
1511 	0x30800, 0xffffffff, 0xe0000000,
1512 	0x3c020, 0xffffffff, 0x00010000,
1513 	0x3c024, 0xffffffff, 0x00030002,
1514 	0x3c028, 0xffffffff, 0x00040007,
1515 	0x3c02c, 0xffffffff, 0x00060005,
1516 	0x3c030, 0xffffffff, 0x00090008,
1517 	0x3c034, 0xffffffff, 0x00010000,
1518 	0x3c038, 0xffffffff, 0x00030002,
1519 	0x3c03c, 0xffffffff, 0x00040007,
1520 	0x3c040, 0xffffffff, 0x00060005,
1521 	0x3c044, 0xffffffff, 0x00090008,
1522 	0x3c048, 0xffffffff, 0x00010000,
1523 	0x3c04c, 0xffffffff, 0x00030002,
1524 	0x3c050, 0xffffffff, 0x00040007,
1525 	0x3c054, 0xffffffff, 0x00060005,
1526 	0x3c058, 0xffffffff, 0x00090008,
1527 	0x3c05c, 0xffffffff, 0x00010000,
1528 	0x3c060, 0xffffffff, 0x00030002,
1529 	0x3c064, 0xffffffff, 0x00040007,
1530 	0x3c068, 0xffffffff, 0x00060005,
1531 	0x3c06c, 0xffffffff, 0x00090008,
1532 	0x3c070, 0xffffffff, 0x00010000,
1533 	0x3c074, 0xffffffff, 0x00030002,
1534 	0x3c078, 0xffffffff, 0x00040007,
1535 	0x3c07c, 0xffffffff, 0x00060005,
1536 	0x3c080, 0xffffffff, 0x00090008,
1537 	0x3c084, 0xffffffff, 0x00010000,
1538 	0x3c088, 0xffffffff, 0x00030002,
1539 	0x3c08c, 0xffffffff, 0x00040007,
1540 	0x3c090, 0xffffffff, 0x00060005,
1541 	0x3c094, 0xffffffff, 0x00090008,
1542 	0x3c098, 0xffffffff, 0x00010000,
1543 	0x3c09c, 0xffffffff, 0x00030002,
1544 	0x3c0a0, 0xffffffff, 0x00040007,
1545 	0x3c0a4, 0xffffffff, 0x00060005,
1546 	0x3c0a8, 0xffffffff, 0x00090008,
1547 	0x3c0ac, 0xffffffff, 0x00010000,
1548 	0x3c0b0, 0xffffffff, 0x00030002,
1549 	0x3c0b4, 0xffffffff, 0x00040007,
1550 	0x3c0b8, 0xffffffff, 0x00060005,
1551 	0x3c0bc, 0xffffffff, 0x00090008,
1552 	0x3c0c0, 0xffffffff, 0x00010000,
1553 	0x3c0c4, 0xffffffff, 0x00030002,
1554 	0x3c0c8, 0xffffffff, 0x00040007,
1555 	0x3c0cc, 0xffffffff, 0x00060005,
1556 	0x3c0d0, 0xffffffff, 0x00090008,
1557 	0x3c0d4, 0xffffffff, 0x00010000,
1558 	0x3c0d8, 0xffffffff, 0x00030002,
1559 	0x3c0dc, 0xffffffff, 0x00040007,
1560 	0x3c0e0, 0xffffffff, 0x00060005,
1561 	0x3c0e4, 0xffffffff, 0x00090008,
1562 	0x3c0e8, 0xffffffff, 0x00010000,
1563 	0x3c0ec, 0xffffffff, 0x00030002,
1564 	0x3c0f0, 0xffffffff, 0x00040007,
1565 	0x3c0f4, 0xffffffff, 0x00060005,
1566 	0x3c0f8, 0xffffffff, 0x00090008,
1567 	0xc318, 0xffffffff, 0x00020200,
1568 	0x3350, 0xffffffff, 0x00000200,
1569 	0x15c0, 0xffffffff, 0x00000400,
1570 	0x55e8, 0xffffffff, 0x00000000,
1571 	0x2f50, 0xffffffff, 0x00000902,
1572 	0x3c000, 0xffffffff, 0x96940200,
1573 	0x8708, 0xffffffff, 0x00900100,
1574 	0xc424, 0xffffffff, 0x0020003f,
1575 	0x38, 0xffffffff, 0x0140001c,
1576 	0x3c, 0x000f0000, 0x000f0000,
1577 	0x220, 0xffffffff, 0xc060000c,
1578 	0x224, 0xc0000fff, 0x00000100,
1579 	0xf90, 0xffffffff, 0x00000100,
1580 	0xf98, 0x00000101, 0x00000000,
1581 	0x20a8, 0xffffffff, 0x00000104,
1582 	0x55e4, 0xff000fff, 0x00000100,
1583 	0x30cc, 0xc0000fff, 0x00000104,
1584 	0xc1e4, 0x00000001, 0x00000001,
1585 	0xd00c, 0xff000ff0, 0x00000100,
1586 	0xd80c, 0xff000ff0, 0x00000100
1587 };
1588 
1589 static const u32 godavari_golden_registers[] =
1590 {
1591 	0x55e4, 0xff607fff, 0xfc000100,
1592 	0x6ed8, 0x00010101, 0x00010000,
1593 	0x9830, 0xffffffff, 0x00000000,
1594 	0x98302, 0xf00fffff, 0x00000400,
1595 	0x6130, 0xffffffff, 0x00010000,
1596 	0x5bb0, 0x000000f0, 0x00000070,
1597 	0x5bc0, 0xf0311fff, 0x80300000,
1598 	0x98f8, 0x73773777, 0x12010001,
1599 	0x98fc, 0xffffffff, 0x00000010,
1600 	0x8030, 0x00001f0f, 0x0000100a,
1601 	0x2f48, 0x73773777, 0x12010001,
1602 	0x2408, 0x000fffff, 0x000c007f,
1603 	0x8a14, 0xf000003f, 0x00000007,
1604 	0x8b24, 0xffffffff, 0x00ff0fff,
1605 	0x30a04, 0x0000ff0f, 0x00000000,
1606 	0x28a4c, 0x07ffffff, 0x06000000,
1607 	0x4d8, 0x00000fff, 0x00000100,
1608 	0xd014, 0x00010000, 0x00810001,
1609 	0xd814, 0x00010000, 0x00810001,
1610 	0x3e78, 0x00000001, 0x00000002,
1611 	0xc768, 0x00000008, 0x00000008,
1612 	0xc770, 0x00000f00, 0x00000800,
1613 	0xc774, 0x00000f00, 0x00000800,
1614 	0xc798, 0x00ffffff, 0x00ff7fbf,
1615 	0xc79c, 0x00ffffff, 0x00ff7faf,
1616 	0x8c00, 0x000000ff, 0x00000001,
1617 	0x214f8, 0x01ff01ff, 0x00000002,
1618 	0x21498, 0x007ff800, 0x00200000,
1619 	0x2015c, 0xffffffff, 0x00000f40,
1620 	0x88c4, 0x001f3ae3, 0x00000082,
1621 	0x88d4, 0x0000001f, 0x00000010,
1622 	0x30934, 0xffffffff, 0x00000000
1623 };
1624 
1625 
1626 static void cik_init_golden_registers(struct radeon_device *rdev)
1627 {
1628 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629 	mutex_lock(&rdev->grbm_idx_mutex);
1630 	switch (rdev->family) {
1631 	case CHIP_BONAIRE:
1632 		radeon_program_register_sequence(rdev,
1633 						 bonaire_mgcg_cgcg_init,
1634 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635 		radeon_program_register_sequence(rdev,
1636 						 bonaire_golden_registers,
1637 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638 		radeon_program_register_sequence(rdev,
1639 						 bonaire_golden_common_registers,
1640 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641 		radeon_program_register_sequence(rdev,
1642 						 bonaire_golden_spm_registers,
1643 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644 		break;
1645 	case CHIP_KABINI:
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_mgcg_cgcg_init,
1648 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649 		radeon_program_register_sequence(rdev,
1650 						 kalindi_golden_registers,
1651 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652 		radeon_program_register_sequence(rdev,
1653 						 kalindi_golden_common_registers,
1654 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655 		radeon_program_register_sequence(rdev,
1656 						 kalindi_golden_spm_registers,
1657 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658 		break;
1659 	case CHIP_MULLINS:
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_mgcg_cgcg_init,
1662 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663 		radeon_program_register_sequence(rdev,
1664 						 godavari_golden_registers,
1665 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666 		radeon_program_register_sequence(rdev,
1667 						 kalindi_golden_common_registers,
1668 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669 		radeon_program_register_sequence(rdev,
1670 						 kalindi_golden_spm_registers,
1671 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672 		break;
1673 	case CHIP_KAVERI:
1674 		radeon_program_register_sequence(rdev,
1675 						 spectre_mgcg_cgcg_init,
1676 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677 		radeon_program_register_sequence(rdev,
1678 						 spectre_golden_registers,
1679 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680 		radeon_program_register_sequence(rdev,
1681 						 spectre_golden_common_registers,
1682 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683 		radeon_program_register_sequence(rdev,
1684 						 spectre_golden_spm_registers,
1685 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686 		break;
1687 	case CHIP_HAWAII:
1688 		radeon_program_register_sequence(rdev,
1689 						 hawaii_mgcg_cgcg_init,
1690 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691 		radeon_program_register_sequence(rdev,
1692 						 hawaii_golden_registers,
1693 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694 		radeon_program_register_sequence(rdev,
1695 						 hawaii_golden_common_registers,
1696 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697 		radeon_program_register_sequence(rdev,
1698 						 hawaii_golden_spm_registers,
1699 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700 		break;
1701 	default:
1702 		break;
1703 	}
1704 	mutex_unlock(&rdev->grbm_idx_mutex);
1705 }
1706 
1707 /**
1708  * cik_get_xclk - get the xclk
1709  *
1710  * @rdev: radeon_device pointer
1711  *
1712  * Returns the reference clock used by the gfx engine
1713  * (CIK).
1714  */
1715 u32 cik_get_xclk(struct radeon_device *rdev)
1716 {
1717 	u32 reference_clock = rdev->clock.spll.reference_freq;
1718 
1719 	if (rdev->flags & RADEON_IS_IGP) {
1720 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721 			return reference_clock / 2;
1722 	} else {
1723 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724 			return reference_clock / 4;
1725 	}
1726 	return reference_clock;
1727 }
1728 
1729 /**
1730  * cik_mm_rdoorbell - read a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  *
1735  * Returns the value in the doorbell aperture at the
1736  * requested doorbell index (CIK).
1737  */
1738 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739 {
1740 	if (index < rdev->doorbell.num_doorbells) {
1741 		return readl(rdev->doorbell.ptr + index);
1742 	} else {
1743 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744 		return 0;
1745 	}
1746 }
1747 
1748 /**
1749  * cik_mm_wdoorbell - write a doorbell dword
1750  *
1751  * @rdev: radeon_device pointer
1752  * @index: doorbell index
1753  * @v: value to write
1754  *
1755  * Writes @v to the doorbell aperture at the
1756  * requested doorbell index (CIK).
1757  */
1758 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759 {
1760 	if (index < rdev->doorbell.num_doorbells) {
1761 		writel(v, rdev->doorbell.ptr + index);
1762 	} else {
1763 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764 	}
1765 }
1766 
1767 #define BONAIRE_IO_MC_REGS_SIZE 36
1768 
1769 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770 {
1771 	{0x00000070, 0x04400000},
1772 	{0x00000071, 0x80c01803},
1773 	{0x00000072, 0x00004004},
1774 	{0x00000073, 0x00000100},
1775 	{0x00000074, 0x00ff0000},
1776 	{0x00000075, 0x34000000},
1777 	{0x00000076, 0x08000014},
1778 	{0x00000077, 0x00cc08ec},
1779 	{0x00000078, 0x00000400},
1780 	{0x00000079, 0x00000000},
1781 	{0x0000007a, 0x04090000},
1782 	{0x0000007c, 0x00000000},
1783 	{0x0000007e, 0x4408a8e8},
1784 	{0x0000007f, 0x00000304},
1785 	{0x00000080, 0x00000000},
1786 	{0x00000082, 0x00000001},
1787 	{0x00000083, 0x00000002},
1788 	{0x00000084, 0xf3e4f400},
1789 	{0x00000085, 0x052024e3},
1790 	{0x00000087, 0x00000000},
1791 	{0x00000088, 0x01000000},
1792 	{0x0000008a, 0x1c0a0000},
1793 	{0x0000008b, 0xff010000},
1794 	{0x0000008d, 0xffffefff},
1795 	{0x0000008e, 0xfff3efff},
1796 	{0x0000008f, 0xfff3efbf},
1797 	{0x00000092, 0xf7ffffff},
1798 	{0x00000093, 0xffffff7f},
1799 	{0x00000095, 0x00101101},
1800 	{0x00000096, 0x00000fff},
1801 	{0x00000097, 0x00116fff},
1802 	{0x00000098, 0x60010000},
1803 	{0x00000099, 0x10010000},
1804 	{0x0000009a, 0x00006000},
1805 	{0x0000009b, 0x00001000},
1806 	{0x0000009f, 0x00b48000}
1807 };
1808 
1809 #define HAWAII_IO_MC_REGS_SIZE 22
1810 
1811 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812 {
1813 	{0x0000007d, 0x40000000},
1814 	{0x0000007e, 0x40180304},
1815 	{0x0000007f, 0x0000ff00},
1816 	{0x00000081, 0x00000000},
1817 	{0x00000083, 0x00000800},
1818 	{0x00000086, 0x00000000},
1819 	{0x00000087, 0x00000100},
1820 	{0x00000088, 0x00020100},
1821 	{0x00000089, 0x00000000},
1822 	{0x0000008b, 0x00040000},
1823 	{0x0000008c, 0x00000100},
1824 	{0x0000008e, 0xff010000},
1825 	{0x00000090, 0xffffefff},
1826 	{0x00000091, 0xfff3efff},
1827 	{0x00000092, 0xfff3efbf},
1828 	{0x00000093, 0xf7ffffff},
1829 	{0x00000094, 0xffffff7f},
1830 	{0x00000095, 0x00000fff},
1831 	{0x00000096, 0x00116fff},
1832 	{0x00000097, 0x60010000},
1833 	{0x00000098, 0x10010000},
1834 	{0x0000009f, 0x00c79000}
1835 };
1836 
1837 
1838 /**
1839  * cik_srbm_select - select specific register instances
1840  *
1841  * @rdev: radeon_device pointer
1842  * @me: selected ME (micro engine)
1843  * @pipe: pipe
1844  * @queue: queue
1845  * @vmid: VMID
1846  *
1847  * Switches the currently active registers instances.  Some
1848  * registers are instanced per VMID, others are instanced per
1849  * me/pipe/queue combination.
1850  */
1851 static void cik_srbm_select(struct radeon_device *rdev,
1852 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1853 {
1854 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855 			     MEID(me & 0x3) |
1856 			     VMID(vmid & 0xf) |
1857 			     QUEUEID(queue & 0x7));
1858 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859 }
1860 
1861 /* ucode loading */
1862 /**
1863  * ci_mc_load_microcode - load MC ucode into the hw
1864  *
1865  * @rdev: radeon_device pointer
1866  *
1867  * Load the GDDR MC ucode into the hw (CIK).
1868  * Returns 0 on success, error on failure.
1869  */
1870 int ci_mc_load_microcode(struct radeon_device *rdev)
1871 {
1872 	const __be32 *fw_data = NULL;
1873 	const __le32 *new_fw_data = NULL;
1874 	u32 running, tmp;
1875 	u32 *io_mc_regs = NULL;
1876 	const __le32 *new_io_mc_regs = NULL;
1877 	int i, regs_size, ucode_size;
1878 
1879 	if (!rdev->mc_fw)
1880 		return -EINVAL;
1881 
1882 	if (rdev->new_fw) {
1883 		const struct mc_firmware_header_v1_0 *hdr =
1884 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885 
1886 		radeon_ucode_print_mc_hdr(&hdr->header);
1887 
1888 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889 		new_io_mc_regs = (const __le32 *)
1890 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892 		new_fw_data = (const __le32 *)
1893 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894 	} else {
1895 		ucode_size = rdev->mc_fw->size / 4;
1896 
1897 		switch (rdev->family) {
1898 		case CHIP_BONAIRE:
1899 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901 			break;
1902 		case CHIP_HAWAII:
1903 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1905 			break;
1906 		default:
1907 			return -EINVAL;
1908 		}
1909 		fw_data = (const __be32 *)rdev->mc_fw->data;
1910 	}
1911 
1912 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913 
1914 	if (running == 0) {
1915 		/* reset the engine and set to writable */
1916 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1917 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1918 
1919 		/* load mc io regs */
1920 		for (i = 0; i < regs_size; i++) {
1921 			if (rdev->new_fw) {
1922 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1923 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1924 			} else {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1927 			}
1928 		}
1929 
1930 		tmp = RREG32(MC_SEQ_MISC0);
1931 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1932 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1933 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1934 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1935 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1936 		}
1937 
1938 		/* load the MC ucode */
1939 		for (i = 0; i < ucode_size; i++) {
1940 			if (rdev->new_fw)
1941 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1942 			else
1943 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1944 		}
1945 
1946 		/* put the engine back into the active state */
1947 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1948 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1949 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1950 
1951 		/* wait for training to complete */
1952 		for (i = 0; i < rdev->usec_timeout; i++) {
1953 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1954 				break;
1955 			udelay(1);
1956 		}
1957 		for (i = 0; i < rdev->usec_timeout; i++) {
1958 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1959 				break;
1960 			udelay(1);
1961 		}
1962 	}
1963 
1964 	return 0;
1965 }
1966 
1967 /**
1968  * cik_init_microcode - load ucode images from disk
1969  *
1970  * @rdev: radeon_device pointer
1971  *
1972  * Use the firmware interface to load the ucode images into
1973  * the driver (not loaded into hw).
1974  * Returns 0 on success, error on failure.
1975  */
1976 static int cik_init_microcode(struct radeon_device *rdev)
1977 {
1978 	const char *chip_name;
1979 	const char *new_chip_name;
1980 	size_t pfp_req_size, me_req_size, ce_req_size,
1981 		mec_req_size, rlc_req_size, mc_req_size = 0,
1982 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1983 	char fw_name[30];
1984 	int new_fw = 0;
1985 	int err;
1986 	int num_fw;
1987 	bool new_smc = false;
1988 
1989 	DRM_DEBUG("\n");
1990 
1991 	switch (rdev->family) {
1992 	case CHIP_BONAIRE:
1993 		chip_name = "BONAIRE";
1994 		if ((rdev->pdev->revision == 0x80) ||
1995 		    (rdev->pdev->revision == 0x81) ||
1996 		    (rdev->pdev->device == 0x665f))
1997 			new_smc = true;
1998 		new_chip_name = "bonaire";
1999 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2000 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2001 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2002 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2003 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2004 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2005 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2006 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2007 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2008 		num_fw = 8;
2009 		break;
2010 	case CHIP_HAWAII:
2011 		chip_name = "HAWAII";
2012 		if (rdev->pdev->revision == 0x80)
2013 			new_smc = true;
2014 		new_chip_name = "hawaii";
2015 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2017 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2021 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2022 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2024 		num_fw = 8;
2025 		break;
2026 	case CHIP_KAVERI:
2027 		chip_name = "KAVERI";
2028 		new_chip_name = "kaveri";
2029 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2030 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2031 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2032 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2033 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2034 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2035 		num_fw = 7;
2036 		break;
2037 	case CHIP_KABINI:
2038 		chip_name = "KABINI";
2039 		new_chip_name = "kabini";
2040 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2041 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2042 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2043 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2044 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2045 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2046 		num_fw = 6;
2047 		break;
2048 	case CHIP_MULLINS:
2049 		chip_name = "MULLINS";
2050 		new_chip_name = "mullins";
2051 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2052 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2053 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2054 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2055 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2056 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2057 		num_fw = 6;
2058 		break;
2059 	default: BUG();
2060 	}
2061 
2062 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2063 
2064 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2065 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2066 	if (err) {
2067 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2068 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069 		if (err)
2070 			goto out;
2071 		if (rdev->pfp_fw->size != pfp_req_size) {
2072 			printk(KERN_ERR
2073 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074 			       rdev->pfp_fw->size, fw_name);
2075 			err = -EINVAL;
2076 			goto out;
2077 		}
2078 	} else {
2079 		err = radeon_ucode_validate(rdev->pfp_fw);
2080 		if (err) {
2081 			printk(KERN_ERR
2082 			       "cik_fw: validation failed for firmware \"%s\"\n",
2083 			       fw_name);
2084 			goto out;
2085 		} else {
2086 			new_fw++;
2087 		}
2088 	}
2089 
2090 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092 	if (err) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095 		if (err)
2096 			goto out;
2097 		if (rdev->me_fw->size != me_req_size) {
2098 			printk(KERN_ERR
2099 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100 			       rdev->me_fw->size, fw_name);
2101 			err = -EINVAL;
2102 		}
2103 	} else {
2104 		err = radeon_ucode_validate(rdev->me_fw);
2105 		if (err) {
2106 			printk(KERN_ERR
2107 			       "cik_fw: validation failed for firmware \"%s\"\n",
2108 			       fw_name);
2109 			goto out;
2110 		} else {
2111 			new_fw++;
2112 		}
2113 	}
2114 
2115 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2116 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2117 	if (err) {
2118 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2119 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2120 		if (err)
2121 			goto out;
2122 		if (rdev->ce_fw->size != ce_req_size) {
2123 			printk(KERN_ERR
2124 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2125 			       rdev->ce_fw->size, fw_name);
2126 			err = -EINVAL;
2127 		}
2128 	} else {
2129 		err = radeon_ucode_validate(rdev->ce_fw);
2130 		if (err) {
2131 			printk(KERN_ERR
2132 			       "cik_fw: validation failed for firmware \"%s\"\n",
2133 			       fw_name);
2134 			goto out;
2135 		} else {
2136 			new_fw++;
2137 		}
2138 	}
2139 
2140 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2141 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142 	if (err) {
2143 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2144 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2145 		if (err)
2146 			goto out;
2147 		if (rdev->mec_fw->size != mec_req_size) {
2148 			printk(KERN_ERR
2149 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2150 			       rdev->mec_fw->size, fw_name);
2151 			err = -EINVAL;
2152 		}
2153 	} else {
2154 		err = radeon_ucode_validate(rdev->mec_fw);
2155 		if (err) {
2156 			printk(KERN_ERR
2157 			       "cik_fw: validation failed for firmware \"%s\"\n",
2158 			       fw_name);
2159 			goto out;
2160 		} else {
2161 			new_fw++;
2162 		}
2163 	}
2164 
2165 	if (rdev->family == CHIP_KAVERI) {
2166 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2167 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2168 		if (err) {
2169 			goto out;
2170 		} else {
2171 			err = radeon_ucode_validate(rdev->mec2_fw);
2172 			if (err) {
2173 				goto out;
2174 			} else {
2175 				new_fw++;
2176 			}
2177 		}
2178 	}
2179 
2180 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2181 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2182 	if (err) {
2183 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2184 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2185 		if (err)
2186 			goto out;
2187 		if (rdev->rlc_fw->size != rlc_req_size) {
2188 			printk(KERN_ERR
2189 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2190 			       rdev->rlc_fw->size, fw_name);
2191 			err = -EINVAL;
2192 		}
2193 	} else {
2194 		err = radeon_ucode_validate(rdev->rlc_fw);
2195 		if (err) {
2196 			printk(KERN_ERR
2197 			       "cik_fw: validation failed for firmware \"%s\"\n",
2198 			       fw_name);
2199 			goto out;
2200 		} else {
2201 			new_fw++;
2202 		}
2203 	}
2204 
2205 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2206 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2207 	if (err) {
2208 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2209 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2210 		if (err)
2211 			goto out;
2212 		if (rdev->sdma_fw->size != sdma_req_size) {
2213 			printk(KERN_ERR
2214 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2215 			       rdev->sdma_fw->size, fw_name);
2216 			err = -EINVAL;
2217 		}
2218 	} else {
2219 		err = radeon_ucode_validate(rdev->sdma_fw);
2220 		if (err) {
2221 			printk(KERN_ERR
2222 			       "cik_fw: validation failed for firmware \"%s\"\n",
2223 			       fw_name);
2224 			goto out;
2225 		} else {
2226 			new_fw++;
2227 		}
2228 	}
2229 
2230 	/* No SMC, MC ucode on APUs */
2231 	if (!(rdev->flags & RADEON_IS_IGP)) {
2232 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2233 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2234 		if (err) {
2235 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2236 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2237 			if (err) {
2238 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2239 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2240 				if (err)
2241 					goto out;
2242 			}
2243 			if ((rdev->mc_fw->size != mc_req_size) &&
2244 			    (rdev->mc_fw->size != mc2_req_size)){
2245 				printk(KERN_ERR
2246 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2247 				       rdev->mc_fw->size, fw_name);
2248 				err = -EINVAL;
2249 			}
2250 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2251 		} else {
2252 			err = radeon_ucode_validate(rdev->mc_fw);
2253 			if (err) {
2254 				printk(KERN_ERR
2255 				       "cik_fw: validation failed for firmware \"%s\"\n",
2256 				       fw_name);
2257 				goto out;
2258 			} else {
2259 				new_fw++;
2260 			}
2261 		}
2262 
2263 		if (new_smc)
2264 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2265 		else
2266 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2267 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2268 		if (err) {
2269 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2270 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2271 			if (err) {
2272 				printk(KERN_ERR
2273 				       "smc: error loading firmware \"%s\"\n",
2274 				       fw_name);
2275 				release_firmware(rdev->smc_fw);
2276 				rdev->smc_fw = NULL;
2277 				err = 0;
2278 			} else if (rdev->smc_fw->size != smc_req_size) {
2279 				printk(KERN_ERR
2280 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281 				       rdev->smc_fw->size, fw_name);
2282 				err = -EINVAL;
2283 			}
2284 		} else {
2285 			err = radeon_ucode_validate(rdev->smc_fw);
2286 			if (err) {
2287 				printk(KERN_ERR
2288 				       "cik_fw: validation failed for firmware \"%s\"\n",
2289 				       fw_name);
2290 				goto out;
2291 			} else {
2292 				new_fw++;
2293 			}
2294 		}
2295 	}
2296 
2297 	if (new_fw == 0) {
2298 		rdev->new_fw = false;
2299 	} else if (new_fw < num_fw) {
2300 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2301 		err = -EINVAL;
2302 	} else {
2303 		rdev->new_fw = true;
2304 	}
2305 
2306 out:
2307 	if (err) {
2308 		if (err != -EINVAL)
2309 			printk(KERN_ERR
2310 			       "cik_cp: Failed to load firmware \"%s\"\n",
2311 			       fw_name);
2312 		release_firmware(rdev->pfp_fw);
2313 		rdev->pfp_fw = NULL;
2314 		release_firmware(rdev->me_fw);
2315 		rdev->me_fw = NULL;
2316 		release_firmware(rdev->ce_fw);
2317 		rdev->ce_fw = NULL;
2318 		release_firmware(rdev->mec_fw);
2319 		rdev->mec_fw = NULL;
2320 		release_firmware(rdev->mec2_fw);
2321 		rdev->mec2_fw = NULL;
2322 		release_firmware(rdev->rlc_fw);
2323 		rdev->rlc_fw = NULL;
2324 		release_firmware(rdev->sdma_fw);
2325 		rdev->sdma_fw = NULL;
2326 		release_firmware(rdev->mc_fw);
2327 		rdev->mc_fw = NULL;
2328 		release_firmware(rdev->smc_fw);
2329 		rdev->smc_fw = NULL;
2330 	}
2331 	return err;
2332 }
2333 
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350 	u32 *tile = rdev->config.cik.tile_mode_array;
2351 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352 	const u32 num_tile_mode_states =
2353 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354 	const u32 num_secondary_tile_mode_states =
2355 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356 	u32 reg_offset, split_equal_to_row_size;
2357 	u32 num_pipe_configs;
2358 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359 		rdev->config.cik.max_shader_engines;
2360 
2361 	switch (rdev->config.cik.mem_row_size_in_kb) {
2362 	case 1:
2363 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364 		break;
2365 	case 2:
2366 	default:
2367 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368 		break;
2369 	case 4:
2370 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371 		break;
2372 	}
2373 
2374 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375 	if (num_pipe_configs > 8)
2376 		num_pipe_configs = 16;
2377 
2378 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379 		tile[reg_offset] = 0;
2380 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381 		macrotile[reg_offset] = 0;
2382 
2383 	switch(num_pipe_configs) {
2384 	case 16:
2385 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 			   TILE_SPLIT(split_equal_to_row_size));
2405 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 			   TILE_SPLIT(split_equal_to_row_size));
2416 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 
2464 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 			   NUM_BANKS(ADDR_SURF_16_BANK));
2468 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 			   NUM_BANKS(ADDR_SURF_16_BANK));
2472 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 			   NUM_BANKS(ADDR_SURF_16_BANK));
2476 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 			   NUM_BANKS(ADDR_SURF_16_BANK));
2480 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 			   NUM_BANKS(ADDR_SURF_8_BANK));
2484 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 			   NUM_BANKS(ADDR_SURF_4_BANK));
2488 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 			   NUM_BANKS(ADDR_SURF_2_BANK));
2492 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 			   NUM_BANKS(ADDR_SURF_16_BANK));
2496 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 			   NUM_BANKS(ADDR_SURF_16_BANK));
2500 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 			    NUM_BANKS(ADDR_SURF_16_BANK));
2504 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507 			    NUM_BANKS(ADDR_SURF_8_BANK));
2508 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 			    NUM_BANKS(ADDR_SURF_4_BANK));
2512 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 			    NUM_BANKS(ADDR_SURF_2_BANK));
2516 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 			    NUM_BANKS(ADDR_SURF_2_BANK));
2520 
2521 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525 		break;
2526 
2527 	case 8:
2528 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 			   TILE_SPLIT(split_equal_to_row_size));
2548 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 			   TILE_SPLIT(split_equal_to_row_size));
2559 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 
2607 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610 				NUM_BANKS(ADDR_SURF_16_BANK));
2611 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614 				NUM_BANKS(ADDR_SURF_16_BANK));
2615 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 				NUM_BANKS(ADDR_SURF_8_BANK));
2627 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 				NUM_BANKS(ADDR_SURF_4_BANK));
2631 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 				NUM_BANKS(ADDR_SURF_2_BANK));
2635 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				NUM_BANKS(ADDR_SURF_8_BANK));
2655 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 				NUM_BANKS(ADDR_SURF_4_BANK));
2659 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 				NUM_BANKS(ADDR_SURF_2_BANK));
2663 
2664 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668 		break;
2669 
2670 	case 4:
2671 		if (num_rbs == 4) {
2672 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 			   TILE_SPLIT(split_equal_to_row_size));
2692 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 			   TILE_SPLIT(split_equal_to_row_size));
2703 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 
2751 		} else if (num_rbs < 4) {
2752 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 			   TILE_SPLIT(split_equal_to_row_size));
2772 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			   TILE_SPLIT(split_equal_to_row_size));
2783 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830 		}
2831 
2832 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 				NUM_BANKS(ADDR_SURF_16_BANK));
2840 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843 				NUM_BANKS(ADDR_SURF_16_BANK));
2844 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_8_BANK));
2856 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859 				NUM_BANKS(ADDR_SURF_4_BANK));
2860 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863 				NUM_BANKS(ADDR_SURF_16_BANK));
2864 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 				NUM_BANKS(ADDR_SURF_16_BANK));
2868 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871 				NUM_BANKS(ADDR_SURF_16_BANK));
2872 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879 				NUM_BANKS(ADDR_SURF_16_BANK));
2880 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883 				NUM_BANKS(ADDR_SURF_8_BANK));
2884 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887 				NUM_BANKS(ADDR_SURF_4_BANK));
2888 
2889 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893 		break;
2894 
2895 	case 2:
2896 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P2) |
2899 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P2) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P2) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 			   PIPE_CONFIG(ADDR_SURF_P2) |
2911 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914 			   PIPE_CONFIG(ADDR_SURF_P2) |
2915 			   TILE_SPLIT(split_equal_to_row_size));
2916 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 			   PIPE_CONFIG(ADDR_SURF_P2) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P2) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925 			   PIPE_CONFIG(ADDR_SURF_P2) |
2926 			   TILE_SPLIT(split_equal_to_row_size));
2927 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928 			   PIPE_CONFIG(ADDR_SURF_P2);
2929 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 			   PIPE_CONFIG(ADDR_SURF_P2));
2932 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934 			    PIPE_CONFIG(ADDR_SURF_P2) |
2935 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938 			    PIPE_CONFIG(ADDR_SURF_P2) |
2939 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 			    PIPE_CONFIG(ADDR_SURF_P2) |
2943 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 			    PIPE_CONFIG(ADDR_SURF_P2) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949 			    PIPE_CONFIG(ADDR_SURF_P2) |
2950 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 			    PIPE_CONFIG(ADDR_SURF_P2) |
2954 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957 			    PIPE_CONFIG(ADDR_SURF_P2) |
2958 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961 			    PIPE_CONFIG(ADDR_SURF_P2));
2962 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 			    PIPE_CONFIG(ADDR_SURF_P2) |
2965 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968 			    PIPE_CONFIG(ADDR_SURF_P2) |
2969 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972 			    PIPE_CONFIG(ADDR_SURF_P2) |
2973 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 
2975 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 				NUM_BANKS(ADDR_SURF_16_BANK));
2979 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 				NUM_BANKS(ADDR_SURF_16_BANK));
2983 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 				NUM_BANKS(ADDR_SURF_16_BANK));
2991 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 				NUM_BANKS(ADDR_SURF_16_BANK));
2995 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002 				NUM_BANKS(ADDR_SURF_8_BANK));
3003 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 				NUM_BANKS(ADDR_SURF_16_BANK));
3011 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014 				NUM_BANKS(ADDR_SURF_16_BANK));
3015 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 				NUM_BANKS(ADDR_SURF_16_BANK));
3023 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 				NUM_BANKS(ADDR_SURF_8_BANK));
3031 
3032 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036 		break;
3037 
3038 	default:
3039 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040 	}
3041 }
3042 
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055 			     u32 se_num, u32 sh_num)
3056 {
3057 	u32 data = INSTANCE_BROADCAST_WRITES;
3058 
3059 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061 	else if (se_num == 0xffffffff)
3062 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063 	else if (sh_num == 0xffffffff)
3064 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065 	else
3066 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067 	WREG32(GRBM_GFX_INDEX, data);
3068 }
3069 
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080 	u32 i, mask = 0;
3081 
3082 	for (i = 0; i < bit_width; i++) {
3083 		mask <<= 1;
3084 		mask |= 1;
3085 	}
3086 	return mask;
3087 }
3088 
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101 			      u32 max_rb_num_per_se,
3102 			      u32 sh_per_se)
3103 {
3104 	u32 data, mask;
3105 
3106 	data = RREG32(CC_RB_BACKEND_DISABLE);
3107 	if (data & 1)
3108 		data &= BACKEND_DISABLE_MASK;
3109 	else
3110 		data = 0;
3111 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112 
3113 	data >>= BACKEND_DISABLE_SHIFT;
3114 
3115 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116 
3117 	return data & mask;
3118 }
3119 
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
3130 static void cik_setup_rb(struct radeon_device *rdev,
3131 			 u32 se_num, u32 sh_per_se,
3132 			 u32 max_rb_num_per_se)
3133 {
3134 	int i, j;
3135 	u32 data, mask;
3136 	u32 disabled_rbs = 0;
3137 	u32 enabled_rbs = 0;
3138 
3139 	mutex_lock(&rdev->grbm_idx_mutex);
3140 	for (i = 0; i < se_num; i++) {
3141 		for (j = 0; j < sh_per_se; j++) {
3142 			cik_select_se_sh(rdev, i, j);
3143 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144 			if (rdev->family == CHIP_HAWAII)
3145 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146 			else
3147 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148 		}
3149 	}
3150 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151 	mutex_unlock(&rdev->grbm_idx_mutex);
3152 
3153 	mask = 1;
3154 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155 		if (!(disabled_rbs & mask))
3156 			enabled_rbs |= mask;
3157 		mask <<= 1;
3158 	}
3159 
3160 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3161 
3162 	mutex_lock(&rdev->grbm_idx_mutex);
3163 	for (i = 0; i < se_num; i++) {
3164 		cik_select_se_sh(rdev, i, 0xffffffff);
3165 		data = 0;
3166 		for (j = 0; j < sh_per_se; j++) {
3167 			switch (enabled_rbs & 3) {
3168 			case 0:
3169 				if (j == 0)
3170 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171 				else
3172 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173 				break;
3174 			case 1:
3175 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176 				break;
3177 			case 2:
3178 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179 				break;
3180 			case 3:
3181 			default:
3182 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183 				break;
3184 			}
3185 			enabled_rbs >>= 2;
3186 		}
3187 		WREG32(PA_SC_RASTER_CONFIG, data);
3188 	}
3189 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190 	mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192 
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204 	u32 mc_shared_chmap, mc_arb_ramcfg;
3205 	u32 hdp_host_path_cntl;
3206 	u32 tmp;
3207 	int i, j;
3208 
3209 	switch (rdev->family) {
3210 	case CHIP_BONAIRE:
3211 		rdev->config.cik.max_shader_engines = 2;
3212 		rdev->config.cik.max_tile_pipes = 4;
3213 		rdev->config.cik.max_cu_per_sh = 7;
3214 		rdev->config.cik.max_sh_per_se = 1;
3215 		rdev->config.cik.max_backends_per_se = 2;
3216 		rdev->config.cik.max_texture_channel_caches = 4;
3217 		rdev->config.cik.max_gprs = 256;
3218 		rdev->config.cik.max_gs_threads = 32;
3219 		rdev->config.cik.max_hw_contexts = 8;
3220 
3221 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226 		break;
3227 	case CHIP_HAWAII:
3228 		rdev->config.cik.max_shader_engines = 4;
3229 		rdev->config.cik.max_tile_pipes = 16;
3230 		rdev->config.cik.max_cu_per_sh = 11;
3231 		rdev->config.cik.max_sh_per_se = 1;
3232 		rdev->config.cik.max_backends_per_se = 4;
3233 		rdev->config.cik.max_texture_channel_caches = 16;
3234 		rdev->config.cik.max_gprs = 256;
3235 		rdev->config.cik.max_gs_threads = 32;
3236 		rdev->config.cik.max_hw_contexts = 8;
3237 
3238 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243 		break;
3244 	case CHIP_KAVERI:
3245 		rdev->config.cik.max_shader_engines = 1;
3246 		rdev->config.cik.max_tile_pipes = 4;
3247 		if ((rdev->pdev->device == 0x1304) ||
3248 		    (rdev->pdev->device == 0x1305) ||
3249 		    (rdev->pdev->device == 0x130C) ||
3250 		    (rdev->pdev->device == 0x130F) ||
3251 		    (rdev->pdev->device == 0x1310) ||
3252 		    (rdev->pdev->device == 0x1311) ||
3253 		    (rdev->pdev->device == 0x131C)) {
3254 			rdev->config.cik.max_cu_per_sh = 8;
3255 			rdev->config.cik.max_backends_per_se = 2;
3256 		} else if ((rdev->pdev->device == 0x1309) ||
3257 			   (rdev->pdev->device == 0x130A) ||
3258 			   (rdev->pdev->device == 0x130D) ||
3259 			   (rdev->pdev->device == 0x1313) ||
3260 			   (rdev->pdev->device == 0x131D)) {
3261 			rdev->config.cik.max_cu_per_sh = 6;
3262 			rdev->config.cik.max_backends_per_se = 2;
3263 		} else if ((rdev->pdev->device == 0x1306) ||
3264 			   (rdev->pdev->device == 0x1307) ||
3265 			   (rdev->pdev->device == 0x130B) ||
3266 			   (rdev->pdev->device == 0x130E) ||
3267 			   (rdev->pdev->device == 0x1315) ||
3268 			   (rdev->pdev->device == 0x1318) ||
3269 			   (rdev->pdev->device == 0x131B)) {
3270 			rdev->config.cik.max_cu_per_sh = 4;
3271 			rdev->config.cik.max_backends_per_se = 1;
3272 		} else {
3273 			rdev->config.cik.max_cu_per_sh = 3;
3274 			rdev->config.cik.max_backends_per_se = 1;
3275 		}
3276 		rdev->config.cik.max_sh_per_se = 1;
3277 		rdev->config.cik.max_texture_channel_caches = 4;
3278 		rdev->config.cik.max_gprs = 256;
3279 		rdev->config.cik.max_gs_threads = 16;
3280 		rdev->config.cik.max_hw_contexts = 8;
3281 
3282 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3283 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3284 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3285 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3286 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3287 		break;
3288 	case CHIP_KABINI:
3289 	case CHIP_MULLINS:
3290 	default:
3291 		rdev->config.cik.max_shader_engines = 1;
3292 		rdev->config.cik.max_tile_pipes = 2;
3293 		rdev->config.cik.max_cu_per_sh = 2;
3294 		rdev->config.cik.max_sh_per_se = 1;
3295 		rdev->config.cik.max_backends_per_se = 1;
3296 		rdev->config.cik.max_texture_channel_caches = 2;
3297 		rdev->config.cik.max_gprs = 256;
3298 		rdev->config.cik.max_gs_threads = 16;
3299 		rdev->config.cik.max_hw_contexts = 8;
3300 
3301 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3302 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3303 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3304 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3305 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3306 		break;
3307 	}
3308 
3309 	/* Initialize HDP */
3310 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3311 		WREG32((0x2c14 + j), 0x00000000);
3312 		WREG32((0x2c18 + j), 0x00000000);
3313 		WREG32((0x2c1c + j), 0x00000000);
3314 		WREG32((0x2c20 + j), 0x00000000);
3315 		WREG32((0x2c24 + j), 0x00000000);
3316 	}
3317 
3318 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3319 	WREG32(SRBM_INT_CNTL, 0x1);
3320 	WREG32(SRBM_INT_ACK, 0x1);
3321 
3322 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3323 
3324 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3325 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3326 
3327 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3328 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3329 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3330 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3331 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3332 		rdev->config.cik.mem_row_size_in_kb = 4;
3333 	/* XXX use MC settings? */
3334 	rdev->config.cik.shader_engine_tile_size = 32;
3335 	rdev->config.cik.num_gpus = 1;
3336 	rdev->config.cik.multi_gpu_tile_size = 64;
3337 
3338 	/* fix up row size */
3339 	gb_addr_config &= ~ROW_SIZE_MASK;
3340 	switch (rdev->config.cik.mem_row_size_in_kb) {
3341 	case 1:
3342 	default:
3343 		gb_addr_config |= ROW_SIZE(0);
3344 		break;
3345 	case 2:
3346 		gb_addr_config |= ROW_SIZE(1);
3347 		break;
3348 	case 4:
3349 		gb_addr_config |= ROW_SIZE(2);
3350 		break;
3351 	}
3352 
3353 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3354 	 * not have bank info, so create a custom tiling dword.
3355 	 * bits 3:0   num_pipes
3356 	 * bits 7:4   num_banks
3357 	 * bits 11:8  group_size
3358 	 * bits 15:12 row_size
3359 	 */
3360 	rdev->config.cik.tile_config = 0;
3361 	switch (rdev->config.cik.num_tile_pipes) {
3362 	case 1:
3363 		rdev->config.cik.tile_config |= (0 << 0);
3364 		break;
3365 	case 2:
3366 		rdev->config.cik.tile_config |= (1 << 0);
3367 		break;
3368 	case 4:
3369 		rdev->config.cik.tile_config |= (2 << 0);
3370 		break;
3371 	case 8:
3372 	default:
3373 		/* XXX what about 12? */
3374 		rdev->config.cik.tile_config |= (3 << 0);
3375 		break;
3376 	}
3377 	rdev->config.cik.tile_config |=
3378 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3379 	rdev->config.cik.tile_config |=
3380 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3381 	rdev->config.cik.tile_config |=
3382 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3383 
3384 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3385 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3386 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3387 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3388 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3389 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3390 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3391 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3392 
3393 	cik_tiling_mode_table_init(rdev);
3394 
3395 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3396 		     rdev->config.cik.max_sh_per_se,
3397 		     rdev->config.cik.max_backends_per_se);
3398 
3399 	rdev->config.cik.active_cus = 0;
3400 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3401 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3402 			rdev->config.cik.active_cus +=
3403 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3404 		}
3405 	}
3406 
3407 	/* set HW defaults for 3D engine */
3408 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3409 
3410 	mutex_lock(&rdev->grbm_idx_mutex);
3411 	/*
3412 	 * making sure that the following register writes will be broadcasted
3413 	 * to all the shaders
3414 	 */
3415 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3416 	WREG32(SX_DEBUG_1, 0x20);
3417 
3418 	WREG32(TA_CNTL_AUX, 0x00010000);
3419 
3420 	tmp = RREG32(SPI_CONFIG_CNTL);
3421 	tmp |= 0x03000000;
3422 	WREG32(SPI_CONFIG_CNTL, tmp);
3423 
3424 	WREG32(SQ_CONFIG, 1);
3425 
3426 	WREG32(DB_DEBUG, 0);
3427 
3428 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3429 	tmp |= 0x00000400;
3430 	WREG32(DB_DEBUG2, tmp);
3431 
3432 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3433 	tmp |= 0x00020200;
3434 	WREG32(DB_DEBUG3, tmp);
3435 
3436 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3437 	tmp |= 0x00018208;
3438 	WREG32(CB_HW_CONTROL, tmp);
3439 
3440 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3441 
3442 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3443 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3444 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3445 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3446 
3447 	WREG32(VGT_NUM_INSTANCES, 1);
3448 
3449 	WREG32(CP_PERFMON_CNTL, 0);
3450 
3451 	WREG32(SQ_CONFIG, 0);
3452 
3453 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3454 					  FORCE_EOV_MAX_REZ_CNT(255)));
3455 
3456 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3457 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3458 
3459 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3460 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3461 
3462 	tmp = RREG32(HDP_MISC_CNTL);
3463 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3464 	WREG32(HDP_MISC_CNTL, tmp);
3465 
3466 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3467 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3468 
3469 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3470 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3471 	mutex_unlock(&rdev->grbm_idx_mutex);
3472 
3473 	udelay(50);
3474 }
3475 
3476 /*
3477  * GPU scratch registers helpers function.
3478  */
3479 /**
3480  * cik_scratch_init - setup driver info for CP scratch regs
3481  *
3482  * @rdev: radeon_device pointer
3483  *
3484  * Set up the number and offset of the CP scratch registers.
3485  * NOTE: use of CP scratch registers is a legacy inferface and
3486  * is not used by default on newer asics (r6xx+).  On newer asics,
3487  * memory buffers are used for fences rather than scratch regs.
3488  */
3489 static void cik_scratch_init(struct radeon_device *rdev)
3490 {
3491 	int i;
3492 
3493 	rdev->scratch.num_reg = 7;
3494 	rdev->scratch.reg_base = SCRATCH_REG0;
3495 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3496 		rdev->scratch.free[i] = true;
3497 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3498 	}
3499 }
3500 
3501 /**
3502  * cik_ring_test - basic gfx ring test
3503  *
3504  * @rdev: radeon_device pointer
3505  * @ring: radeon_ring structure holding ring information
3506  *
3507  * Allocate a scratch register and write to it using the gfx ring (CIK).
3508  * Provides a basic gfx ring test to verify that the ring is working.
3509  * Used by cik_cp_gfx_resume();
3510  * Returns 0 on success, error on failure.
3511  */
3512 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3513 {
3514 	uint32_t scratch;
3515 	uint32_t tmp = 0;
3516 	unsigned i;
3517 	int r;
3518 
3519 	r = radeon_scratch_get(rdev, &scratch);
3520 	if (r) {
3521 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3522 		return r;
3523 	}
3524 	WREG32(scratch, 0xCAFEDEAD);
3525 	r = radeon_ring_lock(rdev, ring, 3);
3526 	if (r) {
3527 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3528 		radeon_scratch_free(rdev, scratch);
3529 		return r;
3530 	}
3531 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3532 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3533 	radeon_ring_write(ring, 0xDEADBEEF);
3534 	radeon_ring_unlock_commit(rdev, ring, false);
3535 
3536 	for (i = 0; i < rdev->usec_timeout; i++) {
3537 		tmp = RREG32(scratch);
3538 		if (tmp == 0xDEADBEEF)
3539 			break;
3540 		DRM_UDELAY(1);
3541 	}
3542 	if (i < rdev->usec_timeout) {
3543 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3544 	} else {
3545 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3546 			  ring->idx, scratch, tmp);
3547 		r = -EINVAL;
3548 	}
3549 	radeon_scratch_free(rdev, scratch);
3550 	return r;
3551 }
3552 
3553 /**
3554  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3555  *
3556  * @rdev: radeon_device pointer
3557  * @ridx: radeon ring index
3558  *
3559  * Emits an hdp flush on the cp.
3560  */
3561 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3562 				       int ridx)
3563 {
3564 	struct radeon_ring *ring = &rdev->ring[ridx];
3565 	u32 ref_and_mask;
3566 
3567 	switch (ring->idx) {
3568 	case CAYMAN_RING_TYPE_CP1_INDEX:
3569 	case CAYMAN_RING_TYPE_CP2_INDEX:
3570 	default:
3571 		switch (ring->me) {
3572 		case 0:
3573 			ref_and_mask = CP2 << ring->pipe;
3574 			break;
3575 		case 1:
3576 			ref_and_mask = CP6 << ring->pipe;
3577 			break;
3578 		default:
3579 			return;
3580 		}
3581 		break;
3582 	case RADEON_RING_TYPE_GFX_INDEX:
3583 		ref_and_mask = CP0;
3584 		break;
3585 	}
3586 
3587 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3588 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3589 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3590 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3591 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3592 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3593 	radeon_ring_write(ring, ref_and_mask);
3594 	radeon_ring_write(ring, ref_and_mask);
3595 	radeon_ring_write(ring, 0x20); /* poll interval */
3596 }
3597 
3598 /**
3599  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3600  *
3601  * @rdev: radeon_device pointer
3602  * @fence: radeon fence object
3603  *
3604  * Emits a fence sequnce number on the gfx ring and flushes
3605  * GPU caches.
3606  */
3607 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3608 			     struct radeon_fence *fence)
3609 {
3610 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3611 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3612 
3613 	/* Workaround for cache flush problems. First send a dummy EOP
3614 	 * event down the pipe with seq one below.
3615 	 */
3616 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3617 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3618 				 EOP_TC_ACTION_EN |
3619 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3620 				 EVENT_INDEX(5)));
3621 	radeon_ring_write(ring, addr & 0xfffffffc);
3622 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3623 				DATA_SEL(1) | INT_SEL(0));
3624 	radeon_ring_write(ring, fence->seq - 1);
3625 	radeon_ring_write(ring, 0);
3626 
3627 	/* Then send the real EOP event down the pipe. */
3628 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3629 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630 				 EOP_TC_ACTION_EN |
3631 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632 				 EVENT_INDEX(5)));
3633 	radeon_ring_write(ring, addr & 0xfffffffc);
3634 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3635 	radeon_ring_write(ring, fence->seq);
3636 	radeon_ring_write(ring, 0);
3637 }
3638 
3639 /**
3640  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3641  *
3642  * @rdev: radeon_device pointer
3643  * @fence: radeon fence object
3644  *
3645  * Emits a fence sequnce number on the compute ring and flushes
3646  * GPU caches.
3647  */
3648 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3649 				 struct radeon_fence *fence)
3650 {
3651 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3652 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3653 
3654 	/* RELEASE_MEM - flush caches, send int */
3655 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3656 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3657 				 EOP_TC_ACTION_EN |
3658 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3659 				 EVENT_INDEX(5)));
3660 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3661 	radeon_ring_write(ring, addr & 0xfffffffc);
3662 	radeon_ring_write(ring, upper_32_bits(addr));
3663 	radeon_ring_write(ring, fence->seq);
3664 	radeon_ring_write(ring, 0);
3665 }
3666 
3667 /**
3668  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3669  *
3670  * @rdev: radeon_device pointer
3671  * @ring: radeon ring buffer object
3672  * @semaphore: radeon semaphore object
3673  * @emit_wait: Is this a sempahore wait?
3674  *
3675  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3676  * from running ahead of semaphore waits.
3677  */
3678 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3679 			     struct radeon_ring *ring,
3680 			     struct radeon_semaphore *semaphore,
3681 			     bool emit_wait)
3682 {
3683 	uint64_t addr = semaphore->gpu_addr;
3684 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3685 
3686 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3687 	radeon_ring_write(ring, lower_32_bits(addr));
3688 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3689 
3690 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3691 		/* Prevent the PFP from running ahead of the semaphore wait */
3692 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3693 		radeon_ring_write(ring, 0x0);
3694 	}
3695 
3696 	return true;
3697 }
3698 
3699 /**
3700  * cik_copy_cpdma - copy pages using the CP DMA engine
3701  *
3702  * @rdev: radeon_device pointer
3703  * @src_offset: src GPU address
3704  * @dst_offset: dst GPU address
3705  * @num_gpu_pages: number of GPU pages to xfer
3706  * @resv: reservation object to sync to
3707  *
3708  * Copy GPU paging using the CP DMA engine (CIK+).
3709  * Used by the radeon ttm implementation to move pages if
3710  * registered as the asic copy callback.
3711  */
3712 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3713 				    uint64_t src_offset, uint64_t dst_offset,
3714 				    unsigned num_gpu_pages,
3715 				    struct reservation_object *resv)
3716 {
3717 	struct radeon_fence *fence;
3718 	struct radeon_sync sync;
3719 	int ring_index = rdev->asic->copy.blit_ring_index;
3720 	struct radeon_ring *ring = &rdev->ring[ring_index];
3721 	u32 size_in_bytes, cur_size_in_bytes, control;
3722 	int i, num_loops;
3723 	int r = 0;
3724 
3725 	radeon_sync_create(&sync);
3726 
3727 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3728 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3729 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3730 	if (r) {
3731 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3732 		radeon_sync_free(rdev, &sync, NULL);
3733 		return ERR_PTR(r);
3734 	}
3735 
3736 	radeon_sync_resv(rdev, &sync, resv, false);
3737 	radeon_sync_rings(rdev, &sync, ring->idx);
3738 
3739 	for (i = 0; i < num_loops; i++) {
3740 		cur_size_in_bytes = size_in_bytes;
3741 		if (cur_size_in_bytes > 0x1fffff)
3742 			cur_size_in_bytes = 0x1fffff;
3743 		size_in_bytes -= cur_size_in_bytes;
3744 		control = 0;
3745 		if (size_in_bytes == 0)
3746 			control |= PACKET3_DMA_DATA_CP_SYNC;
3747 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3748 		radeon_ring_write(ring, control);
3749 		radeon_ring_write(ring, lower_32_bits(src_offset));
3750 		radeon_ring_write(ring, upper_32_bits(src_offset));
3751 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3752 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3753 		radeon_ring_write(ring, cur_size_in_bytes);
3754 		src_offset += cur_size_in_bytes;
3755 		dst_offset += cur_size_in_bytes;
3756 	}
3757 
3758 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3759 	if (r) {
3760 		radeon_ring_unlock_undo(rdev, ring);
3761 		radeon_sync_free(rdev, &sync, NULL);
3762 		return ERR_PTR(r);
3763 	}
3764 
3765 	radeon_ring_unlock_commit(rdev, ring, false);
3766 	radeon_sync_free(rdev, &sync, fence);
3767 
3768 	return fence;
3769 }
3770 
3771 /*
3772  * IB stuff
3773  */
3774 /**
3775  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3776  *
3777  * @rdev: radeon_device pointer
3778  * @ib: radeon indirect buffer object
3779  *
3780  * Emits a DE (drawing engine) or CE (constant engine) IB
3781  * on the gfx ring.  IBs are usually generated by userspace
3782  * acceleration drivers and submitted to the kernel for
3783  * scheduling on the ring.  This function schedules the IB
3784  * on the gfx ring for execution by the GPU.
3785  */
3786 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3787 {
3788 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3789 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3790 	u32 header, control = INDIRECT_BUFFER_VALID;
3791 
3792 	if (ib->is_const_ib) {
3793 		/* set switch buffer packet before const IB */
3794 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3795 		radeon_ring_write(ring, 0);
3796 
3797 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3798 	} else {
3799 		u32 next_rptr;
3800 		if (ring->rptr_save_reg) {
3801 			next_rptr = ring->wptr + 3 + 4;
3802 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3803 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3804 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3805 			radeon_ring_write(ring, next_rptr);
3806 		} else if (rdev->wb.enabled) {
3807 			next_rptr = ring->wptr + 5 + 4;
3808 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3809 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3810 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3811 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3812 			radeon_ring_write(ring, next_rptr);
3813 		}
3814 
3815 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3816 	}
3817 
3818 	control |= ib->length_dw | (vm_id << 24);
3819 
3820 	radeon_ring_write(ring, header);
3821 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3822 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3823 	radeon_ring_write(ring, control);
3824 }
3825 
3826 /**
3827  * cik_ib_test - basic gfx ring IB test
3828  *
3829  * @rdev: radeon_device pointer
3830  * @ring: radeon_ring structure holding ring information
3831  *
3832  * Allocate an IB and execute it on the gfx ring (CIK).
3833  * Provides a basic gfx ring test to verify that IBs are working.
3834  * Returns 0 on success, error on failure.
3835  */
3836 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3837 {
3838 	struct radeon_ib ib;
3839 	uint32_t scratch;
3840 	uint32_t tmp = 0;
3841 	unsigned i;
3842 	int r;
3843 
3844 	r = radeon_scratch_get(rdev, &scratch);
3845 	if (r) {
3846 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3847 		return r;
3848 	}
3849 	WREG32(scratch, 0xCAFEDEAD);
3850 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3851 	if (r) {
3852 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3853 		radeon_scratch_free(rdev, scratch);
3854 		return r;
3855 	}
3856 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3857 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3858 	ib.ptr[2] = 0xDEADBEEF;
3859 	ib.length_dw = 3;
3860 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3861 	if (r) {
3862 		radeon_scratch_free(rdev, scratch);
3863 		radeon_ib_free(rdev, &ib);
3864 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3865 		return r;
3866 	}
3867 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3868 		RADEON_USEC_IB_TEST_TIMEOUT));
3869 	if (r < 0) {
3870 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3871 		radeon_scratch_free(rdev, scratch);
3872 		radeon_ib_free(rdev, &ib);
3873 		return r;
3874 	} else if (r == 0) {
3875 		DRM_ERROR("radeon: fence wait timed out.\n");
3876 		radeon_scratch_free(rdev, scratch);
3877 		radeon_ib_free(rdev, &ib);
3878 		return -ETIMEDOUT;
3879 	}
3880 	r = 0;
3881 	for (i = 0; i < rdev->usec_timeout; i++) {
3882 		tmp = RREG32(scratch);
3883 		if (tmp == 0xDEADBEEF)
3884 			break;
3885 		DRM_UDELAY(1);
3886 	}
3887 	if (i < rdev->usec_timeout) {
3888 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3889 	} else {
3890 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3891 			  scratch, tmp);
3892 		r = -EINVAL;
3893 	}
3894 	radeon_scratch_free(rdev, scratch);
3895 	radeon_ib_free(rdev, &ib);
3896 	return r;
3897 }
3898 
3899 /*
3900  * CP.
3901  * On CIK, gfx and compute now have independant command processors.
3902  *
3903  * GFX
3904  * Gfx consists of a single ring and can process both gfx jobs and
3905  * compute jobs.  The gfx CP consists of three microengines (ME):
3906  * PFP - Pre-Fetch Parser
3907  * ME - Micro Engine
3908  * CE - Constant Engine
3909  * The PFP and ME make up what is considered the Drawing Engine (DE).
3910  * The CE is an asynchronous engine used for updating buffer desciptors
3911  * used by the DE so that they can be loaded into cache in parallel
3912  * while the DE is processing state update packets.
3913  *
3914  * Compute
3915  * The compute CP consists of two microengines (ME):
3916  * MEC1 - Compute MicroEngine 1
3917  * MEC2 - Compute MicroEngine 2
3918  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3919  * The queues are exposed to userspace and are programmed directly
3920  * by the compute runtime.
3921  */
3922 /**
3923  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3924  *
3925  * @rdev: radeon_device pointer
3926  * @enable: enable or disable the MEs
3927  *
3928  * Halts or unhalts the gfx MEs.
3929  */
3930 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3931 {
3932 	if (enable)
3933 		WREG32(CP_ME_CNTL, 0);
3934 	else {
3935 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3936 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3937 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3938 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3939 	}
3940 	udelay(50);
3941 }
3942 
3943 /**
3944  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3945  *
3946  * @rdev: radeon_device pointer
3947  *
3948  * Loads the gfx PFP, ME, and CE ucode.
3949  * Returns 0 for success, -EINVAL if the ucode is not available.
3950  */
3951 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3952 {
3953 	int i;
3954 
3955 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3956 		return -EINVAL;
3957 
3958 	cik_cp_gfx_enable(rdev, false);
3959 
3960 	if (rdev->new_fw) {
3961 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3962 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3963 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3964 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3965 		const struct gfx_firmware_header_v1_0 *me_hdr =
3966 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3967 		const __le32 *fw_data;
3968 		u32 fw_size;
3969 
3970 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3971 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3972 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3973 
3974 		/* PFP */
3975 		fw_data = (const __le32 *)
3976 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3977 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3978 		WREG32(CP_PFP_UCODE_ADDR, 0);
3979 		for (i = 0; i < fw_size; i++)
3980 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3981 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3982 
3983 		/* CE */
3984 		fw_data = (const __le32 *)
3985 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3986 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3987 		WREG32(CP_CE_UCODE_ADDR, 0);
3988 		for (i = 0; i < fw_size; i++)
3989 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3990 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3991 
3992 		/* ME */
3993 		fw_data = (const __be32 *)
3994 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3995 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3996 		WREG32(CP_ME_RAM_WADDR, 0);
3997 		for (i = 0; i < fw_size; i++)
3998 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3999 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4000 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4001 	} else {
4002 		const __be32 *fw_data;
4003 
4004 		/* PFP */
4005 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4006 		WREG32(CP_PFP_UCODE_ADDR, 0);
4007 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4008 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4009 		WREG32(CP_PFP_UCODE_ADDR, 0);
4010 
4011 		/* CE */
4012 		fw_data = (const __be32 *)rdev->ce_fw->data;
4013 		WREG32(CP_CE_UCODE_ADDR, 0);
4014 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4015 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4016 		WREG32(CP_CE_UCODE_ADDR, 0);
4017 
4018 		/* ME */
4019 		fw_data = (const __be32 *)rdev->me_fw->data;
4020 		WREG32(CP_ME_RAM_WADDR, 0);
4021 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4022 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4023 		WREG32(CP_ME_RAM_WADDR, 0);
4024 	}
4025 
4026 	return 0;
4027 }
4028 
4029 /**
4030  * cik_cp_gfx_start - start the gfx ring
4031  *
4032  * @rdev: radeon_device pointer
4033  *
4034  * Enables the ring and loads the clear state context and other
4035  * packets required to init the ring.
4036  * Returns 0 for success, error for failure.
4037  */
4038 static int cik_cp_gfx_start(struct radeon_device *rdev)
4039 {
4040 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4041 	int r, i;
4042 
4043 	/* init the CP */
4044 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4045 	WREG32(CP_ENDIAN_SWAP, 0);
4046 	WREG32(CP_DEVICE_ID, 1);
4047 
4048 	cik_cp_gfx_enable(rdev, true);
4049 
4050 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4051 	if (r) {
4052 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4053 		return r;
4054 	}
4055 
4056 	/* init the CE partitions.  CE only used for gfx on CIK */
4057 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4058 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4059 	radeon_ring_write(ring, 0x8000);
4060 	radeon_ring_write(ring, 0x8000);
4061 
4062 	/* setup clear context state */
4063 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4064 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4065 
4066 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4067 	radeon_ring_write(ring, 0x80000000);
4068 	radeon_ring_write(ring, 0x80000000);
4069 
4070 	for (i = 0; i < cik_default_size; i++)
4071 		radeon_ring_write(ring, cik_default_state[i]);
4072 
4073 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4074 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4075 
4076 	/* set clear context state */
4077 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4078 	radeon_ring_write(ring, 0);
4079 
4080 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4081 	radeon_ring_write(ring, 0x00000316);
4082 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4083 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4084 
4085 	radeon_ring_unlock_commit(rdev, ring, false);
4086 
4087 	return 0;
4088 }
4089 
4090 /**
4091  * cik_cp_gfx_fini - stop the gfx ring
4092  *
4093  * @rdev: radeon_device pointer
4094  *
4095  * Stop the gfx ring and tear down the driver ring
4096  * info.
4097  */
4098 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4099 {
4100 	cik_cp_gfx_enable(rdev, false);
4101 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102 }
4103 
4104 /**
4105  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4106  *
4107  * @rdev: radeon_device pointer
4108  *
4109  * Program the location and size of the gfx ring buffer
4110  * and test it to make sure it's working.
4111  * Returns 0 for success, error for failure.
4112  */
4113 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4114 {
4115 	struct radeon_ring *ring;
4116 	u32 tmp;
4117 	u32 rb_bufsz;
4118 	u64 rb_addr;
4119 	int r;
4120 
4121 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4122 	if (rdev->family != CHIP_HAWAII)
4123 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4124 
4125 	/* Set the write pointer delay */
4126 	WREG32(CP_RB_WPTR_DELAY, 0);
4127 
4128 	/* set the RB to use vmid 0 */
4129 	WREG32(CP_RB_VMID, 0);
4130 
4131 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4132 
4133 	/* ring 0 - compute and gfx */
4134 	/* Set ring buffer size */
4135 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4136 	rb_bufsz = order_base_2(ring->ring_size / 8);
4137 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4138 #ifdef __BIG_ENDIAN
4139 	tmp |= BUF_SWAP_32BIT;
4140 #endif
4141 	WREG32(CP_RB0_CNTL, tmp);
4142 
4143 	/* Initialize the ring buffer's read and write pointers */
4144 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4145 	ring->wptr = 0;
4146 	WREG32(CP_RB0_WPTR, ring->wptr);
4147 
4148 	/* set the wb address wether it's enabled or not */
4149 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4150 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4151 
4152 	/* scratch register shadowing is no longer supported */
4153 	WREG32(SCRATCH_UMSK, 0);
4154 
4155 	if (!rdev->wb.enabled)
4156 		tmp |= RB_NO_UPDATE;
4157 
4158 	mdelay(1);
4159 	WREG32(CP_RB0_CNTL, tmp);
4160 
4161 	rb_addr = ring->gpu_addr >> 8;
4162 	WREG32(CP_RB0_BASE, rb_addr);
4163 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4164 
4165 	/* start the ring */
4166 	cik_cp_gfx_start(rdev);
4167 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4168 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4169 	if (r) {
4170 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4171 		return r;
4172 	}
4173 
4174 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4175 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4176 
4177 	return 0;
4178 }
4179 
4180 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4181 		     struct radeon_ring *ring)
4182 {
4183 	u32 rptr;
4184 
4185 	if (rdev->wb.enabled)
4186 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4187 	else
4188 		rptr = RREG32(CP_RB0_RPTR);
4189 
4190 	return rptr;
4191 }
4192 
4193 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4194 		     struct radeon_ring *ring)
4195 {
4196 	return RREG32(CP_RB0_WPTR);
4197 }
4198 
4199 void cik_gfx_set_wptr(struct radeon_device *rdev,
4200 		      struct radeon_ring *ring)
4201 {
4202 	WREG32(CP_RB0_WPTR, ring->wptr);
4203 	(void)RREG32(CP_RB0_WPTR);
4204 }
4205 
4206 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 rptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4213 	} else {
4214 		mutex_lock(&rdev->srbm_mutex);
4215 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216 		rptr = RREG32(CP_HQD_PQ_RPTR);
4217 		cik_srbm_select(rdev, 0, 0, 0, 0);
4218 		mutex_unlock(&rdev->srbm_mutex);
4219 	}
4220 
4221 	return rptr;
4222 }
4223 
4224 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225 			 struct radeon_ring *ring)
4226 {
4227 	u32 wptr;
4228 
4229 	if (rdev->wb.enabled) {
4230 		/* XXX check if swapping is necessary on BE */
4231 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4232 	} else {
4233 		mutex_lock(&rdev->srbm_mutex);
4234 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235 		wptr = RREG32(CP_HQD_PQ_WPTR);
4236 		cik_srbm_select(rdev, 0, 0, 0, 0);
4237 		mutex_unlock(&rdev->srbm_mutex);
4238 	}
4239 
4240 	return wptr;
4241 }
4242 
4243 void cik_compute_set_wptr(struct radeon_device *rdev,
4244 			  struct radeon_ring *ring)
4245 {
4246 	/* XXX check if swapping is necessary on BE */
4247 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4249 }
4250 
4251 static void cik_compute_stop(struct radeon_device *rdev,
4252 			     struct radeon_ring *ring)
4253 {
4254 	u32 j, tmp;
4255 
4256 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257 	/* Disable wptr polling. */
4258 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259 	tmp &= ~WPTR_POLL_EN;
4260 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261 	/* Disable HQD. */
4262 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4263 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264 		for (j = 0; j < rdev->usec_timeout; j++) {
4265 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266 				break;
4267 			udelay(1);
4268 		}
4269 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270 		WREG32(CP_HQD_PQ_RPTR, 0);
4271 		WREG32(CP_HQD_PQ_WPTR, 0);
4272 	}
4273 	cik_srbm_select(rdev, 0, 0, 0, 0);
4274 }
4275 
4276 /**
4277  * cik_cp_compute_enable - enable/disable the compute CP MEs
4278  *
4279  * @rdev: radeon_device pointer
4280  * @enable: enable or disable the MEs
4281  *
4282  * Halts or unhalts the compute MEs.
4283  */
4284 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285 {
4286 	if (enable)
4287 		WREG32(CP_MEC_CNTL, 0);
4288 	else {
4289 		/*
4290 		 * To make hibernation reliable we need to clear compute ring
4291 		 * configuration before halting the compute ring.
4292 		 */
4293 		mutex_lock(&rdev->srbm_mutex);
4294 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296 		mutex_unlock(&rdev->srbm_mutex);
4297 
4298 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301 	}
4302 	udelay(50);
4303 }
4304 
4305 /**
4306  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Loads the compute MEC1&2 ucode.
4311  * Returns 0 for success, -EINVAL if the ucode is not available.
4312  */
4313 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314 {
4315 	int i;
4316 
4317 	if (!rdev->mec_fw)
4318 		return -EINVAL;
4319 
4320 	cik_cp_compute_enable(rdev, false);
4321 
4322 	if (rdev->new_fw) {
4323 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4324 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325 		const __le32 *fw_data;
4326 		u32 fw_size;
4327 
4328 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329 
4330 		/* MEC1 */
4331 		fw_data = (const __le32 *)
4332 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335 		for (i = 0; i < fw_size; i++)
4336 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338 
4339 		/* MEC2 */
4340 		if (rdev->family == CHIP_KAVERI) {
4341 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343 
4344 			fw_data = (const __le32 *)
4345 				(rdev->mec2_fw->data +
4346 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349 			for (i = 0; i < fw_size; i++)
4350 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352 		}
4353 	} else {
4354 		const __be32 *fw_data;
4355 
4356 		/* MEC1 */
4357 		fw_data = (const __be32 *)rdev->mec_fw->data;
4358 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362 
4363 		if (rdev->family == CHIP_KAVERI) {
4364 			/* MEC2 */
4365 			fw_data = (const __be32 *)rdev->mec_fw->data;
4366 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370 		}
4371 	}
4372 
4373 	return 0;
4374 }
4375 
4376 /**
4377  * cik_cp_compute_start - start the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Enable the compute queues.
4382  * Returns 0 for success, error for failure.
4383  */
4384 static int cik_cp_compute_start(struct radeon_device *rdev)
4385 {
4386 	cik_cp_compute_enable(rdev, true);
4387 
4388 	return 0;
4389 }
4390 
4391 /**
4392  * cik_cp_compute_fini - stop the compute queues
4393  *
4394  * @rdev: radeon_device pointer
4395  *
4396  * Stop the compute queues and tear down the driver queue
4397  * info.
4398  */
4399 static void cik_cp_compute_fini(struct radeon_device *rdev)
4400 {
4401 	int i, idx, r;
4402 
4403 	cik_cp_compute_enable(rdev, false);
4404 
4405 	for (i = 0; i < 2; i++) {
4406 		if (i == 0)
4407 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408 		else
4409 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410 
4411 		if (rdev->ring[idx].mqd_obj) {
4412 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413 			if (unlikely(r != 0))
4414 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415 
4416 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418 
4419 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420 			rdev->ring[idx].mqd_obj = NULL;
4421 		}
4422 	}
4423 }
4424 
4425 static void cik_mec_fini(struct radeon_device *rdev)
4426 {
4427 	int r;
4428 
4429 	if (rdev->mec.hpd_eop_obj) {
4430 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431 		if (unlikely(r != 0))
4432 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435 
4436 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437 		rdev->mec.hpd_eop_obj = NULL;
4438 	}
4439 }
4440 
4441 #define MEC_HPD_SIZE 2048
4442 
4443 static int cik_mec_init(struct radeon_device *rdev)
4444 {
4445 	int r;
4446 	u32 *hpd;
4447 
4448 	/*
4449 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4452 	 * be handled by KFD
4453 	 */
4454 	rdev->mec.num_mec = 1;
4455 	rdev->mec.num_pipe = 1;
4456 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457 
4458 	if (rdev->mec.hpd_eop_obj == NULL) {
4459 		r = radeon_bo_create(rdev,
4460 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461 				     PAGE_SIZE, true,
4462 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463 				     &rdev->mec.hpd_eop_obj);
4464 		if (r) {
4465 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466 			return r;
4467 		}
4468 	}
4469 
4470 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471 	if (unlikely(r != 0)) {
4472 		cik_mec_fini(rdev);
4473 		return r;
4474 	}
4475 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476 			  &rdev->mec.hpd_eop_gpu_addr);
4477 	if (r) {
4478 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479 		cik_mec_fini(rdev);
4480 		return r;
4481 	}
4482 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483 	if (r) {
4484 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485 		cik_mec_fini(rdev);
4486 		return r;
4487 	}
4488 
4489 	/* clear memory.  Not sure if this is required or not */
4490 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491 
4492 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494 
4495 	return 0;
4496 }
4497 
4498 struct hqd_registers
4499 {
4500 	u32 cp_mqd_base_addr;
4501 	u32 cp_mqd_base_addr_hi;
4502 	u32 cp_hqd_active;
4503 	u32 cp_hqd_vmid;
4504 	u32 cp_hqd_persistent_state;
4505 	u32 cp_hqd_pipe_priority;
4506 	u32 cp_hqd_queue_priority;
4507 	u32 cp_hqd_quantum;
4508 	u32 cp_hqd_pq_base;
4509 	u32 cp_hqd_pq_base_hi;
4510 	u32 cp_hqd_pq_rptr;
4511 	u32 cp_hqd_pq_rptr_report_addr;
4512 	u32 cp_hqd_pq_rptr_report_addr_hi;
4513 	u32 cp_hqd_pq_wptr_poll_addr;
4514 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4515 	u32 cp_hqd_pq_doorbell_control;
4516 	u32 cp_hqd_pq_wptr;
4517 	u32 cp_hqd_pq_control;
4518 	u32 cp_hqd_ib_base_addr;
4519 	u32 cp_hqd_ib_base_addr_hi;
4520 	u32 cp_hqd_ib_rptr;
4521 	u32 cp_hqd_ib_control;
4522 	u32 cp_hqd_iq_timer;
4523 	u32 cp_hqd_iq_rptr;
4524 	u32 cp_hqd_dequeue_request;
4525 	u32 cp_hqd_dma_offload;
4526 	u32 cp_hqd_sema_cmd;
4527 	u32 cp_hqd_msg_type;
4528 	u32 cp_hqd_atomic0_preop_lo;
4529 	u32 cp_hqd_atomic0_preop_hi;
4530 	u32 cp_hqd_atomic1_preop_lo;
4531 	u32 cp_hqd_atomic1_preop_hi;
4532 	u32 cp_hqd_hq_scheduler0;
4533 	u32 cp_hqd_hq_scheduler1;
4534 	u32 cp_mqd_control;
4535 };
4536 
4537 struct bonaire_mqd
4538 {
4539 	u32 header;
4540 	u32 dispatch_initiator;
4541 	u32 dimensions[3];
4542 	u32 start_idx[3];
4543 	u32 num_threads[3];
4544 	u32 pipeline_stat_enable;
4545 	u32 perf_counter_enable;
4546 	u32 pgm[2];
4547 	u32 tba[2];
4548 	u32 tma[2];
4549 	u32 pgm_rsrc[2];
4550 	u32 vmid;
4551 	u32 resource_limits;
4552 	u32 static_thread_mgmt01[2];
4553 	u32 tmp_ring_size;
4554 	u32 static_thread_mgmt23[2];
4555 	u32 restart[3];
4556 	u32 thread_trace_enable;
4557 	u32 reserved1;
4558 	u32 user_data[16];
4559 	u32 vgtcs_invoke_count[2];
4560 	struct hqd_registers queue_state;
4561 	u32 dequeue_cntr;
4562 	u32 interrupt_queue[64];
4563 };
4564 
4565 /**
4566  * cik_cp_compute_resume - setup the compute queue registers
4567  *
4568  * @rdev: radeon_device pointer
4569  *
4570  * Program the compute queues and test them to make sure they
4571  * are working.
4572  * Returns 0 for success, error for failure.
4573  */
4574 static int cik_cp_compute_resume(struct radeon_device *rdev)
4575 {
4576 	int r, i, j, idx;
4577 	u32 tmp;
4578 	bool use_doorbell = true;
4579 	u64 hqd_gpu_addr;
4580 	u64 mqd_gpu_addr;
4581 	u64 eop_gpu_addr;
4582 	u64 wb_gpu_addr;
4583 	u32 *buf;
4584 	struct bonaire_mqd *mqd;
4585 
4586 	r = cik_cp_compute_start(rdev);
4587 	if (r)
4588 		return r;
4589 
4590 	/* fix up chicken bits */
4591 	tmp = RREG32(CP_CPF_DEBUG);
4592 	tmp |= (1 << 23);
4593 	WREG32(CP_CPF_DEBUG, tmp);
4594 
4595 	/* init the pipes */
4596 	mutex_lock(&rdev->srbm_mutex);
4597 
4598 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599 
4600 	cik_srbm_select(rdev, 0, 0, 0, 0);
4601 
4602 	/* write the EOP addr */
4603 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605 
4606 	/* set the VMID assigned */
4607 	WREG32(CP_HPD_EOP_VMID, 0);
4608 
4609 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4611 	tmp &= ~EOP_SIZE_MASK;
4612 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4614 
4615 	mutex_unlock(&rdev->srbm_mutex);
4616 
4617 	/* init the queues.  Just two for now. */
4618 	for (i = 0; i < 2; i++) {
4619 		if (i == 0)
4620 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621 		else
4622 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623 
4624 		if (rdev->ring[idx].mqd_obj == NULL) {
4625 			r = radeon_bo_create(rdev,
4626 					     sizeof(struct bonaire_mqd),
4627 					     PAGE_SIZE, true,
4628 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629 					     NULL, &rdev->ring[idx].mqd_obj);
4630 			if (r) {
4631 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632 				return r;
4633 			}
4634 		}
4635 
4636 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637 		if (unlikely(r != 0)) {
4638 			cik_cp_compute_fini(rdev);
4639 			return r;
4640 		}
4641 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642 				  &mqd_gpu_addr);
4643 		if (r) {
4644 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645 			cik_cp_compute_fini(rdev);
4646 			return r;
4647 		}
4648 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649 		if (r) {
4650 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651 			cik_cp_compute_fini(rdev);
4652 			return r;
4653 		}
4654 
4655 		/* init the mqd struct */
4656 		memset(buf, 0, sizeof(struct bonaire_mqd));
4657 
4658 		mqd = (struct bonaire_mqd *)buf;
4659 		mqd->header = 0xC0310800;
4660 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4661 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4662 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4663 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4664 
4665 		mutex_lock(&rdev->srbm_mutex);
4666 		cik_srbm_select(rdev, rdev->ring[idx].me,
4667 				rdev->ring[idx].pipe,
4668 				rdev->ring[idx].queue, 0);
4669 
4670 		/* disable wptr polling */
4671 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672 		tmp &= ~WPTR_POLL_EN;
4673 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674 
4675 		/* enable doorbell? */
4676 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4677 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678 		if (use_doorbell)
4679 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680 		else
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4684 
4685 		/* disable the queue if it's active */
4686 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4687 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4688 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4689 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4690 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691 			for (j = 0; j < rdev->usec_timeout; j++) {
4692 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693 					break;
4694 				udelay(1);
4695 			}
4696 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699 		}
4700 
4701 		/* set the pointer to the MQD */
4702 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706 		/* set MQD vmid to 0 */
4707 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710 
4711 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717 
4718 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4719 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720 		mqd->queue_state.cp_hqd_pq_control &=
4721 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722 
4723 		mqd->queue_state.cp_hqd_pq_control |=
4724 			order_base_2(rdev->ring[idx].ring_size / 8);
4725 		mqd->queue_state.cp_hqd_pq_control |=
4726 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727 #ifdef __BIG_ENDIAN
4728 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729 #endif
4730 		mqd->queue_state.cp_hqd_pq_control &=
4731 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732 		mqd->queue_state.cp_hqd_pq_control |=
4733 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735 
4736 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737 		if (i == 0)
4738 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739 		else
4740 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746 
4747 		/* set the wb address wether it's enabled or not */
4748 		if (i == 0)
4749 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750 		else
4751 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754 			upper_32_bits(wb_gpu_addr) & 0xffff;
4755 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759 
4760 		/* enable the doorbell if requested */
4761 		if (use_doorbell) {
4762 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4763 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4770 
4771 		} else {
4772 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773 		}
4774 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4776 
4777 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778 		rdev->ring[idx].wptr = 0;
4779 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782 
4783 		/* set the vmid for the queue */
4784 		mqd->queue_state.cp_hqd_vmid = 0;
4785 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786 
4787 		/* activate the queue */
4788 		mqd->queue_state.cp_hqd_active = 1;
4789 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790 
4791 		cik_srbm_select(rdev, 0, 0, 0, 0);
4792 		mutex_unlock(&rdev->srbm_mutex);
4793 
4794 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796 
4797 		rdev->ring[idx].ready = true;
4798 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799 		if (r)
4800 			rdev->ring[idx].ready = false;
4801 	}
4802 
4803 	return 0;
4804 }
4805 
4806 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807 {
4808 	cik_cp_gfx_enable(rdev, enable);
4809 	cik_cp_compute_enable(rdev, enable);
4810 }
4811 
4812 static int cik_cp_load_microcode(struct radeon_device *rdev)
4813 {
4814 	int r;
4815 
4816 	r = cik_cp_gfx_load_microcode(rdev);
4817 	if (r)
4818 		return r;
4819 	r = cik_cp_compute_load_microcode(rdev);
4820 	if (r)
4821 		return r;
4822 
4823 	return 0;
4824 }
4825 
4826 static void cik_cp_fini(struct radeon_device *rdev)
4827 {
4828 	cik_cp_gfx_fini(rdev);
4829 	cik_cp_compute_fini(rdev);
4830 }
4831 
4832 static int cik_cp_resume(struct radeon_device *rdev)
4833 {
4834 	int r;
4835 
4836 	cik_enable_gui_idle_interrupt(rdev, false);
4837 
4838 	r = cik_cp_load_microcode(rdev);
4839 	if (r)
4840 		return r;
4841 
4842 	r = cik_cp_gfx_resume(rdev);
4843 	if (r)
4844 		return r;
4845 	r = cik_cp_compute_resume(rdev);
4846 	if (r)
4847 		return r;
4848 
4849 	cik_enable_gui_idle_interrupt(rdev, true);
4850 
4851 	return 0;
4852 }
4853 
4854 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855 {
4856 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857 		RREG32(GRBM_STATUS));
4858 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859 		RREG32(GRBM_STATUS2));
4860 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861 		RREG32(GRBM_STATUS_SE0));
4862 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863 		RREG32(GRBM_STATUS_SE1));
4864 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865 		RREG32(GRBM_STATUS_SE2));
4866 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867 		RREG32(GRBM_STATUS_SE3));
4868 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869 		RREG32(SRBM_STATUS));
4870 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871 		RREG32(SRBM_STATUS2));
4872 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878 		 RREG32(CP_STALLED_STAT1));
4879 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880 		 RREG32(CP_STALLED_STAT2));
4881 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882 		 RREG32(CP_STALLED_STAT3));
4883 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884 		 RREG32(CP_CPF_BUSY_STAT));
4885 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886 		 RREG32(CP_CPF_STALLED_STAT1));
4887 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890 		 RREG32(CP_CPC_STALLED_STAT1));
4891 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892 }
4893 
4894 /**
4895  * cik_gpu_check_soft_reset - check which blocks are busy
4896  *
4897  * @rdev: radeon_device pointer
4898  *
4899  * Check which blocks are busy and return the relevant reset
4900  * mask to be used by cik_gpu_soft_reset().
4901  * Returns a mask of the blocks to be reset.
4902  */
4903 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904 {
4905 	u32 reset_mask = 0;
4906 	u32 tmp;
4907 
4908 	/* GRBM_STATUS */
4909 	tmp = RREG32(GRBM_STATUS);
4910 	if (tmp & (PA_BUSY | SC_BUSY |
4911 		   BCI_BUSY | SX_BUSY |
4912 		   TA_BUSY | VGT_BUSY |
4913 		   DB_BUSY | CB_BUSY |
4914 		   GDS_BUSY | SPI_BUSY |
4915 		   IA_BUSY | IA_BUSY_NO_DMA))
4916 		reset_mask |= RADEON_RESET_GFX;
4917 
4918 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919 		reset_mask |= RADEON_RESET_CP;
4920 
4921 	/* GRBM_STATUS2 */
4922 	tmp = RREG32(GRBM_STATUS2);
4923 	if (tmp & RLC_BUSY)
4924 		reset_mask |= RADEON_RESET_RLC;
4925 
4926 	/* SDMA0_STATUS_REG */
4927 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928 	if (!(tmp & SDMA_IDLE))
4929 		reset_mask |= RADEON_RESET_DMA;
4930 
4931 	/* SDMA1_STATUS_REG */
4932 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933 	if (!(tmp & SDMA_IDLE))
4934 		reset_mask |= RADEON_RESET_DMA1;
4935 
4936 	/* SRBM_STATUS2 */
4937 	tmp = RREG32(SRBM_STATUS2);
4938 	if (tmp & SDMA_BUSY)
4939 		reset_mask |= RADEON_RESET_DMA;
4940 
4941 	if (tmp & SDMA1_BUSY)
4942 		reset_mask |= RADEON_RESET_DMA1;
4943 
4944 	/* SRBM_STATUS */
4945 	tmp = RREG32(SRBM_STATUS);
4946 
4947 	if (tmp & IH_BUSY)
4948 		reset_mask |= RADEON_RESET_IH;
4949 
4950 	if (tmp & SEM_BUSY)
4951 		reset_mask |= RADEON_RESET_SEM;
4952 
4953 	if (tmp & GRBM_RQ_PENDING)
4954 		reset_mask |= RADEON_RESET_GRBM;
4955 
4956 	if (tmp & VMC_BUSY)
4957 		reset_mask |= RADEON_RESET_VMC;
4958 
4959 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960 		   MCC_BUSY | MCD_BUSY))
4961 		reset_mask |= RADEON_RESET_MC;
4962 
4963 	if (evergreen_is_display_hung(rdev))
4964 		reset_mask |= RADEON_RESET_DISPLAY;
4965 
4966 	/* Skip MC reset as it's mostly likely not hung, just busy */
4967 	if (reset_mask & RADEON_RESET_MC) {
4968 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969 		reset_mask &= ~RADEON_RESET_MC;
4970 	}
4971 
4972 	return reset_mask;
4973 }
4974 
4975 /**
4976  * cik_gpu_soft_reset - soft reset GPU
4977  *
4978  * @rdev: radeon_device pointer
4979  * @reset_mask: mask of which blocks to reset
4980  *
4981  * Soft reset the blocks specified in @reset_mask.
4982  */
4983 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984 {
4985 	struct evergreen_mc_save save;
4986 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987 	u32 tmp;
4988 
4989 	if (reset_mask == 0)
4990 		return;
4991 
4992 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993 
4994 	cik_print_gpu_status_regs(rdev);
4995 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999 
5000 	/* disable CG/PG */
5001 	cik_fini_pg(rdev);
5002 	cik_fini_cg(rdev);
5003 
5004 	/* stop the rlc */
5005 	cik_rlc_stop(rdev);
5006 
5007 	/* Disable GFX parsing/prefetching */
5008 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009 
5010 	/* Disable MEC parsing/prefetching */
5011 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012 
5013 	if (reset_mask & RADEON_RESET_DMA) {
5014 		/* sdma0 */
5015 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016 		tmp |= SDMA_HALT;
5017 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018 	}
5019 	if (reset_mask & RADEON_RESET_DMA1) {
5020 		/* sdma1 */
5021 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022 		tmp |= SDMA_HALT;
5023 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024 	}
5025 
5026 	evergreen_mc_stop(rdev, &save);
5027 	if (evergreen_mc_wait_for_idle(rdev)) {
5028 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029 	}
5030 
5031 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033 
5034 	if (reset_mask & RADEON_RESET_CP) {
5035 		grbm_soft_reset |= SOFT_RESET_CP;
5036 
5037 		srbm_soft_reset |= SOFT_RESET_GRBM;
5038 	}
5039 
5040 	if (reset_mask & RADEON_RESET_DMA)
5041 		srbm_soft_reset |= SOFT_RESET_SDMA;
5042 
5043 	if (reset_mask & RADEON_RESET_DMA1)
5044 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5045 
5046 	if (reset_mask & RADEON_RESET_DISPLAY)
5047 		srbm_soft_reset |= SOFT_RESET_DC;
5048 
5049 	if (reset_mask & RADEON_RESET_RLC)
5050 		grbm_soft_reset |= SOFT_RESET_RLC;
5051 
5052 	if (reset_mask & RADEON_RESET_SEM)
5053 		srbm_soft_reset |= SOFT_RESET_SEM;
5054 
5055 	if (reset_mask & RADEON_RESET_IH)
5056 		srbm_soft_reset |= SOFT_RESET_IH;
5057 
5058 	if (reset_mask & RADEON_RESET_GRBM)
5059 		srbm_soft_reset |= SOFT_RESET_GRBM;
5060 
5061 	if (reset_mask & RADEON_RESET_VMC)
5062 		srbm_soft_reset |= SOFT_RESET_VMC;
5063 
5064 	if (!(rdev->flags & RADEON_IS_IGP)) {
5065 		if (reset_mask & RADEON_RESET_MC)
5066 			srbm_soft_reset |= SOFT_RESET_MC;
5067 	}
5068 
5069 	if (grbm_soft_reset) {
5070 		tmp = RREG32(GRBM_SOFT_RESET);
5071 		tmp |= grbm_soft_reset;
5072 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073 		WREG32(GRBM_SOFT_RESET, tmp);
5074 		tmp = RREG32(GRBM_SOFT_RESET);
5075 
5076 		udelay(50);
5077 
5078 		tmp &= ~grbm_soft_reset;
5079 		WREG32(GRBM_SOFT_RESET, tmp);
5080 		tmp = RREG32(GRBM_SOFT_RESET);
5081 	}
5082 
5083 	if (srbm_soft_reset) {
5084 		tmp = RREG32(SRBM_SOFT_RESET);
5085 		tmp |= srbm_soft_reset;
5086 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087 		WREG32(SRBM_SOFT_RESET, tmp);
5088 		tmp = RREG32(SRBM_SOFT_RESET);
5089 
5090 		udelay(50);
5091 
5092 		tmp &= ~srbm_soft_reset;
5093 		WREG32(SRBM_SOFT_RESET, tmp);
5094 		tmp = RREG32(SRBM_SOFT_RESET);
5095 	}
5096 
5097 	/* Wait a little for things to settle down */
5098 	udelay(50);
5099 
5100 	evergreen_mc_resume(rdev, &save);
5101 	udelay(50);
5102 
5103 	cik_print_gpu_status_regs(rdev);
5104 }
5105 
5106 struct kv_reset_save_regs {
5107 	u32 gmcon_reng_execute;
5108 	u32 gmcon_misc;
5109 	u32 gmcon_misc3;
5110 };
5111 
5112 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113 				   struct kv_reset_save_regs *save)
5114 {
5115 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116 	save->gmcon_misc = RREG32(GMCON_MISC);
5117 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118 
5119 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121 						STCTRL_STUTTER_EN));
5122 }
5123 
5124 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125 				      struct kv_reset_save_regs *save)
5126 {
5127 	int i;
5128 
5129 	WREG32(GMCON_PGFSM_WRITE, 0);
5130 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131 
5132 	for (i = 0; i < 5; i++)
5133 		WREG32(GMCON_PGFSM_WRITE, 0);
5134 
5135 	WREG32(GMCON_PGFSM_WRITE, 0);
5136 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137 
5138 	for (i = 0; i < 5; i++)
5139 		WREG32(GMCON_PGFSM_WRITE, 0);
5140 
5141 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143 
5144 	for (i = 0; i < 5; i++)
5145 		WREG32(GMCON_PGFSM_WRITE, 0);
5146 
5147 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149 
5150 	for (i = 0; i < 5; i++)
5151 		WREG32(GMCON_PGFSM_WRITE, 0);
5152 
5153 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155 
5156 	for (i = 0; i < 5; i++)
5157 		WREG32(GMCON_PGFSM_WRITE, 0);
5158 
5159 	WREG32(GMCON_PGFSM_WRITE, 0);
5160 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161 
5162 	for (i = 0; i < 5; i++)
5163 		WREG32(GMCON_PGFSM_WRITE, 0);
5164 
5165 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167 
5168 	for (i = 0; i < 5; i++)
5169 		WREG32(GMCON_PGFSM_WRITE, 0);
5170 
5171 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173 
5174 	for (i = 0; i < 5; i++)
5175 		WREG32(GMCON_PGFSM_WRITE, 0);
5176 
5177 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179 
5180 	for (i = 0; i < 5; i++)
5181 		WREG32(GMCON_PGFSM_WRITE, 0);
5182 
5183 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185 
5186 	for (i = 0; i < 5; i++)
5187 		WREG32(GMCON_PGFSM_WRITE, 0);
5188 
5189 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191 
5192 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5193 	WREG32(GMCON_MISC, save->gmcon_misc);
5194 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195 }
5196 
5197 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198 {
5199 	struct evergreen_mc_save save;
5200 	struct kv_reset_save_regs kv_save = { 0 };
5201 	u32 tmp, i;
5202 
5203 	dev_info(rdev->dev, "GPU pci config reset\n");
5204 
5205 	/* disable dpm? */
5206 
5207 	/* disable cg/pg */
5208 	cik_fini_pg(rdev);
5209 	cik_fini_cg(rdev);
5210 
5211 	/* Disable GFX parsing/prefetching */
5212 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213 
5214 	/* Disable MEC parsing/prefetching */
5215 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216 
5217 	/* sdma0 */
5218 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219 	tmp |= SDMA_HALT;
5220 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221 	/* sdma1 */
5222 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223 	tmp |= SDMA_HALT;
5224 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225 	/* XXX other engines? */
5226 
5227 	/* halt the rlc, disable cp internal ints */
5228 	cik_rlc_stop(rdev);
5229 
5230 	udelay(50);
5231 
5232 	/* disable mem access */
5233 	evergreen_mc_stop(rdev, &save);
5234 	if (evergreen_mc_wait_for_idle(rdev)) {
5235 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236 	}
5237 
5238 	if (rdev->flags & RADEON_IS_IGP)
5239 		kv_save_regs_for_reset(rdev, &kv_save);
5240 
5241 	/* disable BM */
5242 	pci_clear_master(rdev->pdev);
5243 	/* reset */
5244 	radeon_pci_config_reset(rdev);
5245 
5246 	udelay(100);
5247 
5248 	/* wait for asic to come out of reset */
5249 	for (i = 0; i < rdev->usec_timeout; i++) {
5250 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251 			break;
5252 		udelay(1);
5253 	}
5254 
5255 	/* does asic init need to be run first??? */
5256 	if (rdev->flags & RADEON_IS_IGP)
5257 		kv_restore_regs_for_reset(rdev, &kv_save);
5258 }
5259 
5260 /**
5261  * cik_asic_reset - soft reset GPU
5262  *
5263  * @rdev: radeon_device pointer
5264  * @hard: force hard reset
5265  *
5266  * Look up which blocks are hung and attempt
5267  * to reset them.
5268  * Returns 0 for success.
5269  */
5270 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5271 {
5272 	u32 reset_mask;
5273 
5274 	if (hard) {
5275 		cik_gpu_pci_config_reset(rdev);
5276 		return 0;
5277 	}
5278 
5279 	reset_mask = cik_gpu_check_soft_reset(rdev);
5280 
5281 	if (reset_mask)
5282 		r600_set_bios_scratch_engine_hung(rdev, true);
5283 
5284 	/* try soft reset */
5285 	cik_gpu_soft_reset(rdev, reset_mask);
5286 
5287 	reset_mask = cik_gpu_check_soft_reset(rdev);
5288 
5289 	/* try pci config reset */
5290 	if (reset_mask && radeon_hard_reset)
5291 		cik_gpu_pci_config_reset(rdev);
5292 
5293 	reset_mask = cik_gpu_check_soft_reset(rdev);
5294 
5295 	if (!reset_mask)
5296 		r600_set_bios_scratch_engine_hung(rdev, false);
5297 
5298 	return 0;
5299 }
5300 
5301 /**
5302  * cik_gfx_is_lockup - check if the 3D engine is locked up
5303  *
5304  * @rdev: radeon_device pointer
5305  * @ring: radeon_ring structure holding ring information
5306  *
5307  * Check if the 3D engine is locked up (CIK).
5308  * Returns true if the engine is locked, false if not.
5309  */
5310 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5311 {
5312 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5313 
5314 	if (!(reset_mask & (RADEON_RESET_GFX |
5315 			    RADEON_RESET_COMPUTE |
5316 			    RADEON_RESET_CP))) {
5317 		radeon_ring_lockup_update(rdev, ring);
5318 		return false;
5319 	}
5320 	return radeon_ring_test_lockup(rdev, ring);
5321 }
5322 
5323 /* MC */
5324 /**
5325  * cik_mc_program - program the GPU memory controller
5326  *
5327  * @rdev: radeon_device pointer
5328  *
5329  * Set the location of vram, gart, and AGP in the GPU's
5330  * physical address space (CIK).
5331  */
5332 static void cik_mc_program(struct radeon_device *rdev)
5333 {
5334 	struct evergreen_mc_save save;
5335 	u32 tmp;
5336 	int i, j;
5337 
5338 	/* Initialize HDP */
5339 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5340 		WREG32((0x2c14 + j), 0x00000000);
5341 		WREG32((0x2c18 + j), 0x00000000);
5342 		WREG32((0x2c1c + j), 0x00000000);
5343 		WREG32((0x2c20 + j), 0x00000000);
5344 		WREG32((0x2c24 + j), 0x00000000);
5345 	}
5346 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5347 
5348 	evergreen_mc_stop(rdev, &save);
5349 	if (radeon_mc_wait_for_idle(rdev)) {
5350 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351 	}
5352 	/* Lockout access through VGA aperture*/
5353 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5354 	/* Update configuration */
5355 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5356 	       rdev->mc.vram_start >> 12);
5357 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5358 	       rdev->mc.vram_end >> 12);
5359 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5360 	       rdev->vram_scratch.gpu_addr >> 12);
5361 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5362 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5363 	WREG32(MC_VM_FB_LOCATION, tmp);
5364 	/* XXX double check these! */
5365 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5366 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5367 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5368 	WREG32(MC_VM_AGP_BASE, 0);
5369 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5370 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5371 	if (radeon_mc_wait_for_idle(rdev)) {
5372 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5373 	}
5374 	evergreen_mc_resume(rdev, &save);
5375 	/* we need to own VRAM, so turn off the VGA renderer here
5376 	 * to stop it overwriting our objects */
5377 	rv515_vga_render_disable(rdev);
5378 }
5379 
5380 /**
5381  * cik_mc_init - initialize the memory controller driver params
5382  *
5383  * @rdev: radeon_device pointer
5384  *
5385  * Look up the amount of vram, vram width, and decide how to place
5386  * vram and gart within the GPU's physical address space (CIK).
5387  * Returns 0 for success.
5388  */
5389 static int cik_mc_init(struct radeon_device *rdev)
5390 {
5391 	u32 tmp;
5392 	int chansize, numchan;
5393 
5394 	/* Get VRAM informations */
5395 	rdev->mc.vram_is_ddr = true;
5396 	tmp = RREG32(MC_ARB_RAMCFG);
5397 	if (tmp & CHANSIZE_MASK) {
5398 		chansize = 64;
5399 	} else {
5400 		chansize = 32;
5401 	}
5402 	tmp = RREG32(MC_SHARED_CHMAP);
5403 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5404 	case 0:
5405 	default:
5406 		numchan = 1;
5407 		break;
5408 	case 1:
5409 		numchan = 2;
5410 		break;
5411 	case 2:
5412 		numchan = 4;
5413 		break;
5414 	case 3:
5415 		numchan = 8;
5416 		break;
5417 	case 4:
5418 		numchan = 3;
5419 		break;
5420 	case 5:
5421 		numchan = 6;
5422 		break;
5423 	case 6:
5424 		numchan = 10;
5425 		break;
5426 	case 7:
5427 		numchan = 12;
5428 		break;
5429 	case 8:
5430 		numchan = 16;
5431 		break;
5432 	}
5433 	rdev->mc.vram_width = numchan * chansize;
5434 	/* Could aper size report 0 ? */
5435 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5436 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5437 	/* size in MB on si */
5438 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5439 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5440 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5441 	si_vram_gtt_location(rdev, &rdev->mc);
5442 	radeon_update_bandwidth_info(rdev);
5443 
5444 	return 0;
5445 }
5446 
5447 /*
5448  * GART
5449  * VMID 0 is the physical GPU addresses as used by the kernel.
5450  * VMIDs 1-15 are used for userspace clients and are handled
5451  * by the radeon vm/hsa code.
5452  */
5453 /**
5454  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5455  *
5456  * @rdev: radeon_device pointer
5457  *
5458  * Flush the TLB for the VMID 0 page table (CIK).
5459  */
5460 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5461 {
5462 	/* flush hdp cache */
5463 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5464 
5465 	/* bits 0-15 are the VM contexts0-15 */
5466 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5467 }
5468 
5469 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5470 {
5471 	int i;
5472 	uint32_t sh_mem_bases, sh_mem_config;
5473 
5474 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5475 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5476 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5477 
5478 	mutex_lock(&rdev->srbm_mutex);
5479 	for (i = 8; i < 16; i++) {
5480 		cik_srbm_select(rdev, 0, 0, 0, i);
5481 		/* CP and shaders */
5482 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5483 		WREG32(SH_MEM_APE1_BASE, 1);
5484 		WREG32(SH_MEM_APE1_LIMIT, 0);
5485 		WREG32(SH_MEM_BASES, sh_mem_bases);
5486 	}
5487 	cik_srbm_select(rdev, 0, 0, 0, 0);
5488 	mutex_unlock(&rdev->srbm_mutex);
5489 }
5490 
5491 /**
5492  * cik_pcie_gart_enable - gart enable
5493  *
5494  * @rdev: radeon_device pointer
5495  *
5496  * This sets up the TLBs, programs the page tables for VMID0,
5497  * sets up the hw for VMIDs 1-15 which are allocated on
5498  * demand, and sets up the global locations for the LDS, GDS,
5499  * and GPUVM for FSA64 clients (CIK).
5500  * Returns 0 for success, errors for failure.
5501  */
5502 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5503 {
5504 	int r, i;
5505 
5506 	if (rdev->gart.robj == NULL) {
5507 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5508 		return -EINVAL;
5509 	}
5510 	r = radeon_gart_table_vram_pin(rdev);
5511 	if (r)
5512 		return r;
5513 	/* Setup TLB control */
5514 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5515 	       (0xA << 7) |
5516 	       ENABLE_L1_TLB |
5517 	       ENABLE_L1_FRAGMENT_PROCESSING |
5518 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5519 	       ENABLE_ADVANCED_DRIVER_MODEL |
5520 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5521 	/* Setup L2 cache */
5522 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5523 	       ENABLE_L2_FRAGMENT_PROCESSING |
5524 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5525 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5526 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5527 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5528 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5529 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5530 	       BANK_SELECT(4) |
5531 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5532 	/* setup context0 */
5533 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5534 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5535 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5536 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5537 			(u32)(rdev->dummy_page.addr >> 12));
5538 	WREG32(VM_CONTEXT0_CNTL2, 0);
5539 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5540 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5541 
5542 	WREG32(0x15D4, 0);
5543 	WREG32(0x15D8, 0);
5544 	WREG32(0x15DC, 0);
5545 
5546 	/* restore context1-15 */
5547 	/* set vm size, must be a multiple of 4 */
5548 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5549 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5550 	for (i = 1; i < 16; i++) {
5551 		if (i < 8)
5552 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5553 			       rdev->vm_manager.saved_table_addr[i]);
5554 		else
5555 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5556 			       rdev->vm_manager.saved_table_addr[i]);
5557 	}
5558 
5559 	/* enable context1-15 */
5560 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5561 	       (u32)(rdev->dummy_page.addr >> 12));
5562 	WREG32(VM_CONTEXT1_CNTL2, 4);
5563 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5564 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5565 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5567 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5569 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5571 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5572 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5573 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5574 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5575 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5576 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5577 
5578 	if (rdev->family == CHIP_KAVERI) {
5579 		u32 tmp = RREG32(CHUB_CONTROL);
5580 		tmp &= ~BYPASS_VM;
5581 		WREG32(CHUB_CONTROL, tmp);
5582 	}
5583 
5584 	/* XXX SH_MEM regs */
5585 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5586 	mutex_lock(&rdev->srbm_mutex);
5587 	for (i = 0; i < 16; i++) {
5588 		cik_srbm_select(rdev, 0, 0, 0, i);
5589 		/* CP and shaders */
5590 		WREG32(SH_MEM_CONFIG, 0);
5591 		WREG32(SH_MEM_APE1_BASE, 1);
5592 		WREG32(SH_MEM_APE1_LIMIT, 0);
5593 		WREG32(SH_MEM_BASES, 0);
5594 		/* SDMA GFX */
5595 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5596 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5597 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5598 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5599 		/* XXX SDMA RLC - todo */
5600 	}
5601 	cik_srbm_select(rdev, 0, 0, 0, 0);
5602 	mutex_unlock(&rdev->srbm_mutex);
5603 
5604 	cik_pcie_init_compute_vmid(rdev);
5605 
5606 	cik_pcie_gart_tlb_flush(rdev);
5607 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5608 		 (unsigned)(rdev->mc.gtt_size >> 20),
5609 		 (unsigned long long)rdev->gart.table_addr);
5610 	rdev->gart.ready = true;
5611 	return 0;
5612 }
5613 
5614 /**
5615  * cik_pcie_gart_disable - gart disable
5616  *
5617  * @rdev: radeon_device pointer
5618  *
5619  * This disables all VM page table (CIK).
5620  */
5621 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5622 {
5623 	unsigned i;
5624 
5625 	for (i = 1; i < 16; ++i) {
5626 		uint32_t reg;
5627 		if (i < 8)
5628 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5629 		else
5630 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5631 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5632 	}
5633 
5634 	/* Disable all tables */
5635 	WREG32(VM_CONTEXT0_CNTL, 0);
5636 	WREG32(VM_CONTEXT1_CNTL, 0);
5637 	/* Setup TLB control */
5638 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5639 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5640 	/* Setup L2 cache */
5641 	WREG32(VM_L2_CNTL,
5642 	       ENABLE_L2_FRAGMENT_PROCESSING |
5643 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5644 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5645 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5646 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5647 	WREG32(VM_L2_CNTL2, 0);
5648 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5649 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5650 	radeon_gart_table_vram_unpin(rdev);
5651 }
5652 
5653 /**
5654  * cik_pcie_gart_fini - vm fini callback
5655  *
5656  * @rdev: radeon_device pointer
5657  *
5658  * Tears down the driver GART/VM setup (CIK).
5659  */
5660 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5661 {
5662 	cik_pcie_gart_disable(rdev);
5663 	radeon_gart_table_vram_free(rdev);
5664 	radeon_gart_fini(rdev);
5665 }
5666 
5667 /* vm parser */
5668 /**
5669  * cik_ib_parse - vm ib_parse callback
5670  *
5671  * @rdev: radeon_device pointer
5672  * @ib: indirect buffer pointer
5673  *
5674  * CIK uses hw IB checking so this is a nop (CIK).
5675  */
5676 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5677 {
5678 	return 0;
5679 }
5680 
5681 /*
5682  * vm
5683  * VMID 0 is the physical GPU addresses as used by the kernel.
5684  * VMIDs 1-15 are used for userspace clients and are handled
5685  * by the radeon vm/hsa code.
5686  */
5687 /**
5688  * cik_vm_init - cik vm init callback
5689  *
5690  * @rdev: radeon_device pointer
5691  *
5692  * Inits cik specific vm parameters (number of VMs, base of vram for
5693  * VMIDs 1-15) (CIK).
5694  * Returns 0 for success.
5695  */
5696 int cik_vm_init(struct radeon_device *rdev)
5697 {
5698 	/*
5699 	 * number of VMs
5700 	 * VMID 0 is reserved for System
5701 	 * radeon graphics/compute will use VMIDs 1-7
5702 	 * amdkfd will use VMIDs 8-15
5703 	 */
5704 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5705 	/* base offset of vram pages */
5706 	if (rdev->flags & RADEON_IS_IGP) {
5707 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5708 		tmp <<= 22;
5709 		rdev->vm_manager.vram_base_offset = tmp;
5710 	} else
5711 		rdev->vm_manager.vram_base_offset = 0;
5712 
5713 	return 0;
5714 }
5715 
5716 /**
5717  * cik_vm_fini - cik vm fini callback
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Tear down any asic specific VM setup (CIK).
5722  */
5723 void cik_vm_fini(struct radeon_device *rdev)
5724 {
5725 }
5726 
5727 /**
5728  * cik_vm_decode_fault - print human readable fault info
5729  *
5730  * @rdev: radeon_device pointer
5731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5733  *
5734  * Print human readable fault information (CIK).
5735  */
5736 static void cik_vm_decode_fault(struct radeon_device *rdev,
5737 				u32 status, u32 addr, u32 mc_client)
5738 {
5739 	u32 mc_id;
5740 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5741 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5742 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5743 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5744 
5745 	if (rdev->family == CHIP_HAWAII)
5746 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5747 	else
5748 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5749 
5750 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5751 	       protections, vmid, addr,
5752 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5753 	       block, mc_client, mc_id);
5754 }
5755 
5756 /**
5757  * cik_vm_flush - cik vm flush using the CP
5758  *
5759  * @rdev: radeon_device pointer
5760  *
5761  * Update the page table base and flush the VM TLB
5762  * using the CP (CIK).
5763  */
5764 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5765 		  unsigned vm_id, uint64_t pd_addr)
5766 {
5767 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5768 
5769 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771 				 WRITE_DATA_DST_SEL(0)));
5772 	if (vm_id < 8) {
5773 		radeon_ring_write(ring,
5774 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5775 	} else {
5776 		radeon_ring_write(ring,
5777 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5778 	}
5779 	radeon_ring_write(ring, 0);
5780 	radeon_ring_write(ring, pd_addr >> 12);
5781 
5782 	/* update SH_MEM_* regs */
5783 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5784 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5785 				 WRITE_DATA_DST_SEL(0)));
5786 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5787 	radeon_ring_write(ring, 0);
5788 	radeon_ring_write(ring, VMID(vm_id));
5789 
5790 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5791 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5792 				 WRITE_DATA_DST_SEL(0)));
5793 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5794 	radeon_ring_write(ring, 0);
5795 
5796 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5797 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5798 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5799 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5800 
5801 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803 				 WRITE_DATA_DST_SEL(0)));
5804 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805 	radeon_ring_write(ring, 0);
5806 	radeon_ring_write(ring, VMID(0));
5807 
5808 	/* HDP flush */
5809 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5810 
5811 	/* bits 0-15 are the VM contexts0-15 */
5812 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5813 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5814 				 WRITE_DATA_DST_SEL(0)));
5815 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5816 	radeon_ring_write(ring, 0);
5817 	radeon_ring_write(ring, 1 << vm_id);
5818 
5819 	/* wait for the invalidate to complete */
5820 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5821 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5822 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5823 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5824 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5825 	radeon_ring_write(ring, 0);
5826 	radeon_ring_write(ring, 0); /* ref */
5827 	radeon_ring_write(ring, 0); /* mask */
5828 	radeon_ring_write(ring, 0x20); /* poll interval */
5829 
5830 	/* compute doesn't have PFP */
5831 	if (usepfp) {
5832 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5833 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5834 		radeon_ring_write(ring, 0x0);
5835 	}
5836 }
5837 
5838 /*
5839  * RLC
5840  * The RLC is a multi-purpose microengine that handles a
5841  * variety of functions, the most important of which is
5842  * the interrupt controller.
5843  */
5844 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5845 					  bool enable)
5846 {
5847 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5848 
5849 	if (enable)
5850 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5851 	else
5852 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5853 	WREG32(CP_INT_CNTL_RING0, tmp);
5854 }
5855 
5856 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5857 {
5858 	u32 tmp;
5859 
5860 	tmp = RREG32(RLC_LB_CNTL);
5861 	if (enable)
5862 		tmp |= LOAD_BALANCE_ENABLE;
5863 	else
5864 		tmp &= ~LOAD_BALANCE_ENABLE;
5865 	WREG32(RLC_LB_CNTL, tmp);
5866 }
5867 
5868 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5869 {
5870 	u32 i, j, k;
5871 	u32 mask;
5872 
5873 	mutex_lock(&rdev->grbm_idx_mutex);
5874 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5875 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5876 			cik_select_se_sh(rdev, i, j);
5877 			for (k = 0; k < rdev->usec_timeout; k++) {
5878 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5879 					break;
5880 				udelay(1);
5881 			}
5882 		}
5883 	}
5884 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5885 	mutex_unlock(&rdev->grbm_idx_mutex);
5886 
5887 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5888 	for (k = 0; k < rdev->usec_timeout; k++) {
5889 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5890 			break;
5891 		udelay(1);
5892 	}
5893 }
5894 
5895 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5896 {
5897 	u32 tmp;
5898 
5899 	tmp = RREG32(RLC_CNTL);
5900 	if (tmp != rlc)
5901 		WREG32(RLC_CNTL, rlc);
5902 }
5903 
5904 static u32 cik_halt_rlc(struct radeon_device *rdev)
5905 {
5906 	u32 data, orig;
5907 
5908 	orig = data = RREG32(RLC_CNTL);
5909 
5910 	if (data & RLC_ENABLE) {
5911 		u32 i;
5912 
5913 		data &= ~RLC_ENABLE;
5914 		WREG32(RLC_CNTL, data);
5915 
5916 		for (i = 0; i < rdev->usec_timeout; i++) {
5917 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5918 				break;
5919 			udelay(1);
5920 		}
5921 
5922 		cik_wait_for_rlc_serdes(rdev);
5923 	}
5924 
5925 	return orig;
5926 }
5927 
5928 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5929 {
5930 	u32 tmp, i, mask;
5931 
5932 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5933 	WREG32(RLC_GPR_REG2, tmp);
5934 
5935 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5936 	for (i = 0; i < rdev->usec_timeout; i++) {
5937 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5938 			break;
5939 		udelay(1);
5940 	}
5941 
5942 	for (i = 0; i < rdev->usec_timeout; i++) {
5943 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5944 			break;
5945 		udelay(1);
5946 	}
5947 }
5948 
5949 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5950 {
5951 	u32 tmp;
5952 
5953 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5954 	WREG32(RLC_GPR_REG2, tmp);
5955 }
5956 
5957 /**
5958  * cik_rlc_stop - stop the RLC ME
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Halt the RLC ME (MicroEngine) (CIK).
5963  */
5964 static void cik_rlc_stop(struct radeon_device *rdev)
5965 {
5966 	WREG32(RLC_CNTL, 0);
5967 
5968 	cik_enable_gui_idle_interrupt(rdev, false);
5969 
5970 	cik_wait_for_rlc_serdes(rdev);
5971 }
5972 
5973 /**
5974  * cik_rlc_start - start the RLC ME
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Unhalt the RLC ME (MicroEngine) (CIK).
5979  */
5980 static void cik_rlc_start(struct radeon_device *rdev)
5981 {
5982 	WREG32(RLC_CNTL, RLC_ENABLE);
5983 
5984 	cik_enable_gui_idle_interrupt(rdev, true);
5985 
5986 	udelay(50);
5987 }
5988 
5989 /**
5990  * cik_rlc_resume - setup the RLC hw
5991  *
5992  * @rdev: radeon_device pointer
5993  *
5994  * Initialize the RLC registers, load the ucode,
5995  * and start the RLC (CIK).
5996  * Returns 0 for success, -EINVAL if the ucode is not available.
5997  */
5998 static int cik_rlc_resume(struct radeon_device *rdev)
5999 {
6000 	u32 i, size, tmp;
6001 
6002 	if (!rdev->rlc_fw)
6003 		return -EINVAL;
6004 
6005 	cik_rlc_stop(rdev);
6006 
6007 	/* disable CG */
6008 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6009 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6010 
6011 	si_rlc_reset(rdev);
6012 
6013 	cik_init_pg(rdev);
6014 
6015 	cik_init_cg(rdev);
6016 
6017 	WREG32(RLC_LB_CNTR_INIT, 0);
6018 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6019 
6020 	mutex_lock(&rdev->grbm_idx_mutex);
6021 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6023 	WREG32(RLC_LB_PARAMS, 0x00600408);
6024 	WREG32(RLC_LB_CNTL, 0x80000004);
6025 	mutex_unlock(&rdev->grbm_idx_mutex);
6026 
6027 	WREG32(RLC_MC_CNTL, 0);
6028 	WREG32(RLC_UCODE_CNTL, 0);
6029 
6030 	if (rdev->new_fw) {
6031 		const struct rlc_firmware_header_v1_0 *hdr =
6032 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6033 		const __le32 *fw_data = (const __le32 *)
6034 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6035 
6036 		radeon_ucode_print_rlc_hdr(&hdr->header);
6037 
6038 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6039 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6040 		for (i = 0; i < size; i++)
6041 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6042 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6043 	} else {
6044 		const __be32 *fw_data;
6045 
6046 		switch (rdev->family) {
6047 		case CHIP_BONAIRE:
6048 		case CHIP_HAWAII:
6049 		default:
6050 			size = BONAIRE_RLC_UCODE_SIZE;
6051 			break;
6052 		case CHIP_KAVERI:
6053 			size = KV_RLC_UCODE_SIZE;
6054 			break;
6055 		case CHIP_KABINI:
6056 			size = KB_RLC_UCODE_SIZE;
6057 			break;
6058 		case CHIP_MULLINS:
6059 			size = ML_RLC_UCODE_SIZE;
6060 			break;
6061 		}
6062 
6063 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6064 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6065 		for (i = 0; i < size; i++)
6066 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6067 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6068 	}
6069 
6070 	/* XXX - find out what chips support lbpw */
6071 	cik_enable_lbpw(rdev, false);
6072 
6073 	if (rdev->family == CHIP_BONAIRE)
6074 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6075 
6076 	cik_rlc_start(rdev);
6077 
6078 	return 0;
6079 }
6080 
6081 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6082 {
6083 	u32 data, orig, tmp, tmp2;
6084 
6085 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6086 
6087 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6088 		cik_enable_gui_idle_interrupt(rdev, true);
6089 
6090 		tmp = cik_halt_rlc(rdev);
6091 
6092 		mutex_lock(&rdev->grbm_idx_mutex);
6093 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6094 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6095 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6096 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6097 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6098 		mutex_unlock(&rdev->grbm_idx_mutex);
6099 
6100 		cik_update_rlc(rdev, tmp);
6101 
6102 		data |= CGCG_EN | CGLS_EN;
6103 	} else {
6104 		cik_enable_gui_idle_interrupt(rdev, false);
6105 
6106 		RREG32(CB_CGTT_SCLK_CTRL);
6107 		RREG32(CB_CGTT_SCLK_CTRL);
6108 		RREG32(CB_CGTT_SCLK_CTRL);
6109 		RREG32(CB_CGTT_SCLK_CTRL);
6110 
6111 		data &= ~(CGCG_EN | CGLS_EN);
6112 	}
6113 
6114 	if (orig != data)
6115 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6116 
6117 }
6118 
6119 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6120 {
6121 	u32 data, orig, tmp = 0;
6122 
6123 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6124 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6125 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6126 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6127 				data |= CP_MEM_LS_EN;
6128 				if (orig != data)
6129 					WREG32(CP_MEM_SLP_CNTL, data);
6130 			}
6131 		}
6132 
6133 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6134 		data |= 0x00000001;
6135 		data &= 0xfffffffd;
6136 		if (orig != data)
6137 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6138 
6139 		tmp = cik_halt_rlc(rdev);
6140 
6141 		mutex_lock(&rdev->grbm_idx_mutex);
6142 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6144 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6145 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6146 		WREG32(RLC_SERDES_WR_CTRL, data);
6147 		mutex_unlock(&rdev->grbm_idx_mutex);
6148 
6149 		cik_update_rlc(rdev, tmp);
6150 
6151 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6152 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6153 			data &= ~SM_MODE_MASK;
6154 			data |= SM_MODE(0x2);
6155 			data |= SM_MODE_ENABLE;
6156 			data &= ~CGTS_OVERRIDE;
6157 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6158 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6159 				data &= ~CGTS_LS_OVERRIDE;
6160 			data &= ~ON_MONITOR_ADD_MASK;
6161 			data |= ON_MONITOR_ADD_EN;
6162 			data |= ON_MONITOR_ADD(0x96);
6163 			if (orig != data)
6164 				WREG32(CGTS_SM_CTRL_REG, data);
6165 		}
6166 	} else {
6167 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6168 		data |= 0x00000003;
6169 		if (orig != data)
6170 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6171 
6172 		data = RREG32(RLC_MEM_SLP_CNTL);
6173 		if (data & RLC_MEM_LS_EN) {
6174 			data &= ~RLC_MEM_LS_EN;
6175 			WREG32(RLC_MEM_SLP_CNTL, data);
6176 		}
6177 
6178 		data = RREG32(CP_MEM_SLP_CNTL);
6179 		if (data & CP_MEM_LS_EN) {
6180 			data &= ~CP_MEM_LS_EN;
6181 			WREG32(CP_MEM_SLP_CNTL, data);
6182 		}
6183 
6184 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6185 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6186 		if (orig != data)
6187 			WREG32(CGTS_SM_CTRL_REG, data);
6188 
6189 		tmp = cik_halt_rlc(rdev);
6190 
6191 		mutex_lock(&rdev->grbm_idx_mutex);
6192 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6193 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6194 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6195 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6196 		WREG32(RLC_SERDES_WR_CTRL, data);
6197 		mutex_unlock(&rdev->grbm_idx_mutex);
6198 
6199 		cik_update_rlc(rdev, tmp);
6200 	}
6201 }
6202 
6203 static const u32 mc_cg_registers[] =
6204 {
6205 	MC_HUB_MISC_HUB_CG,
6206 	MC_HUB_MISC_SIP_CG,
6207 	MC_HUB_MISC_VM_CG,
6208 	MC_XPB_CLK_GAT,
6209 	ATC_MISC_CG,
6210 	MC_CITF_MISC_WR_CG,
6211 	MC_CITF_MISC_RD_CG,
6212 	MC_CITF_MISC_VM_CG,
6213 	VM_L2_CG,
6214 };
6215 
6216 static void cik_enable_mc_ls(struct radeon_device *rdev,
6217 			     bool enable)
6218 {
6219 	int i;
6220 	u32 orig, data;
6221 
6222 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6223 		orig = data = RREG32(mc_cg_registers[i]);
6224 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6225 			data |= MC_LS_ENABLE;
6226 		else
6227 			data &= ~MC_LS_ENABLE;
6228 		if (data != orig)
6229 			WREG32(mc_cg_registers[i], data);
6230 	}
6231 }
6232 
6233 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6234 			       bool enable)
6235 {
6236 	int i;
6237 	u32 orig, data;
6238 
6239 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6240 		orig = data = RREG32(mc_cg_registers[i]);
6241 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6242 			data |= MC_CG_ENABLE;
6243 		else
6244 			data &= ~MC_CG_ENABLE;
6245 		if (data != orig)
6246 			WREG32(mc_cg_registers[i], data);
6247 	}
6248 }
6249 
6250 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6251 				 bool enable)
6252 {
6253 	u32 orig, data;
6254 
6255 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6256 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6257 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6258 	} else {
6259 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6260 		data |= 0xff000000;
6261 		if (data != orig)
6262 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6263 
6264 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6265 		data |= 0xff000000;
6266 		if (data != orig)
6267 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6268 	}
6269 }
6270 
6271 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6272 				 bool enable)
6273 {
6274 	u32 orig, data;
6275 
6276 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6277 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6278 		data |= 0x100;
6279 		if (orig != data)
6280 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6281 
6282 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6283 		data |= 0x100;
6284 		if (orig != data)
6285 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6286 	} else {
6287 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6288 		data &= ~0x100;
6289 		if (orig != data)
6290 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6291 
6292 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6293 		data &= ~0x100;
6294 		if (orig != data)
6295 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6296 	}
6297 }
6298 
6299 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6300 				bool enable)
6301 {
6302 	u32 orig, data;
6303 
6304 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6305 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6306 		data = 0xfff;
6307 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6308 
6309 		orig = data = RREG32(UVD_CGC_CTRL);
6310 		data |= DCM;
6311 		if (orig != data)
6312 			WREG32(UVD_CGC_CTRL, data);
6313 	} else {
6314 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6315 		data &= ~0xfff;
6316 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6317 
6318 		orig = data = RREG32(UVD_CGC_CTRL);
6319 		data &= ~DCM;
6320 		if (orig != data)
6321 			WREG32(UVD_CGC_CTRL, data);
6322 	}
6323 }
6324 
6325 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6326 			       bool enable)
6327 {
6328 	u32 orig, data;
6329 
6330 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6331 
6332 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6333 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6334 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6335 	else
6336 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6337 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6338 
6339 	if (orig != data)
6340 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6341 }
6342 
6343 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6344 				bool enable)
6345 {
6346 	u32 orig, data;
6347 
6348 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6349 
6350 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6351 		data &= ~CLOCK_GATING_DIS;
6352 	else
6353 		data |= CLOCK_GATING_DIS;
6354 
6355 	if (orig != data)
6356 		WREG32(HDP_HOST_PATH_CNTL, data);
6357 }
6358 
6359 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6360 			      bool enable)
6361 {
6362 	u32 orig, data;
6363 
6364 	orig = data = RREG32(HDP_MEM_POWER_LS);
6365 
6366 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6367 		data |= HDP_LS_ENABLE;
6368 	else
6369 		data &= ~HDP_LS_ENABLE;
6370 
6371 	if (orig != data)
6372 		WREG32(HDP_MEM_POWER_LS, data);
6373 }
6374 
6375 void cik_update_cg(struct radeon_device *rdev,
6376 		   u32 block, bool enable)
6377 {
6378 
6379 	if (block & RADEON_CG_BLOCK_GFX) {
6380 		cik_enable_gui_idle_interrupt(rdev, false);
6381 		/* order matters! */
6382 		if (enable) {
6383 			cik_enable_mgcg(rdev, true);
6384 			cik_enable_cgcg(rdev, true);
6385 		} else {
6386 			cik_enable_cgcg(rdev, false);
6387 			cik_enable_mgcg(rdev, false);
6388 		}
6389 		cik_enable_gui_idle_interrupt(rdev, true);
6390 	}
6391 
6392 	if (block & RADEON_CG_BLOCK_MC) {
6393 		if (!(rdev->flags & RADEON_IS_IGP)) {
6394 			cik_enable_mc_mgcg(rdev, enable);
6395 			cik_enable_mc_ls(rdev, enable);
6396 		}
6397 	}
6398 
6399 	if (block & RADEON_CG_BLOCK_SDMA) {
6400 		cik_enable_sdma_mgcg(rdev, enable);
6401 		cik_enable_sdma_mgls(rdev, enable);
6402 	}
6403 
6404 	if (block & RADEON_CG_BLOCK_BIF) {
6405 		cik_enable_bif_mgls(rdev, enable);
6406 	}
6407 
6408 	if (block & RADEON_CG_BLOCK_UVD) {
6409 		if (rdev->has_uvd)
6410 			cik_enable_uvd_mgcg(rdev, enable);
6411 	}
6412 
6413 	if (block & RADEON_CG_BLOCK_HDP) {
6414 		cik_enable_hdp_mgcg(rdev, enable);
6415 		cik_enable_hdp_ls(rdev, enable);
6416 	}
6417 
6418 	if (block & RADEON_CG_BLOCK_VCE) {
6419 		vce_v2_0_enable_mgcg(rdev, enable);
6420 	}
6421 }
6422 
6423 static void cik_init_cg(struct radeon_device *rdev)
6424 {
6425 
6426 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6427 
6428 	if (rdev->has_uvd)
6429 		si_init_uvd_internal_cg(rdev);
6430 
6431 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6432 			     RADEON_CG_BLOCK_SDMA |
6433 			     RADEON_CG_BLOCK_BIF |
6434 			     RADEON_CG_BLOCK_UVD |
6435 			     RADEON_CG_BLOCK_HDP), true);
6436 }
6437 
6438 static void cik_fini_cg(struct radeon_device *rdev)
6439 {
6440 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6441 			     RADEON_CG_BLOCK_SDMA |
6442 			     RADEON_CG_BLOCK_BIF |
6443 			     RADEON_CG_BLOCK_UVD |
6444 			     RADEON_CG_BLOCK_HDP), false);
6445 
6446 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6447 }
6448 
6449 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6450 					  bool enable)
6451 {
6452 	u32 data, orig;
6453 
6454 	orig = data = RREG32(RLC_PG_CNTL);
6455 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6457 	else
6458 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6459 	if (orig != data)
6460 		WREG32(RLC_PG_CNTL, data);
6461 }
6462 
6463 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6464 					  bool enable)
6465 {
6466 	u32 data, orig;
6467 
6468 	orig = data = RREG32(RLC_PG_CNTL);
6469 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6470 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6471 	else
6472 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6473 	if (orig != data)
6474 		WREG32(RLC_PG_CNTL, data);
6475 }
6476 
6477 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6478 {
6479 	u32 data, orig;
6480 
6481 	orig = data = RREG32(RLC_PG_CNTL);
6482 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6483 		data &= ~DISABLE_CP_PG;
6484 	else
6485 		data |= DISABLE_CP_PG;
6486 	if (orig != data)
6487 		WREG32(RLC_PG_CNTL, data);
6488 }
6489 
6490 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6491 {
6492 	u32 data, orig;
6493 
6494 	orig = data = RREG32(RLC_PG_CNTL);
6495 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6496 		data &= ~DISABLE_GDS_PG;
6497 	else
6498 		data |= DISABLE_GDS_PG;
6499 	if (orig != data)
6500 		WREG32(RLC_PG_CNTL, data);
6501 }
6502 
6503 #define CP_ME_TABLE_SIZE    96
6504 #define CP_ME_TABLE_OFFSET  2048
6505 #define CP_MEC_TABLE_OFFSET 4096
6506 
6507 void cik_init_cp_pg_table(struct radeon_device *rdev)
6508 {
6509 	volatile u32 *dst_ptr;
6510 	int me, i, max_me = 4;
6511 	u32 bo_offset = 0;
6512 	u32 table_offset, table_size;
6513 
6514 	if (rdev->family == CHIP_KAVERI)
6515 		max_me = 5;
6516 
6517 	if (rdev->rlc.cp_table_ptr == NULL)
6518 		return;
6519 
6520 	/* write the cp table buffer */
6521 	dst_ptr = rdev->rlc.cp_table_ptr;
6522 	for (me = 0; me < max_me; me++) {
6523 		if (rdev->new_fw) {
6524 			const __le32 *fw_data;
6525 			const struct gfx_firmware_header_v1_0 *hdr;
6526 
6527 			if (me == 0) {
6528 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6529 				fw_data = (const __le32 *)
6530 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531 				table_offset = le32_to_cpu(hdr->jt_offset);
6532 				table_size = le32_to_cpu(hdr->jt_size);
6533 			} else if (me == 1) {
6534 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6535 				fw_data = (const __le32 *)
6536 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537 				table_offset = le32_to_cpu(hdr->jt_offset);
6538 				table_size = le32_to_cpu(hdr->jt_size);
6539 			} else if (me == 2) {
6540 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6541 				fw_data = (const __le32 *)
6542 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543 				table_offset = le32_to_cpu(hdr->jt_offset);
6544 				table_size = le32_to_cpu(hdr->jt_size);
6545 			} else if (me == 3) {
6546 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6547 				fw_data = (const __le32 *)
6548 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549 				table_offset = le32_to_cpu(hdr->jt_offset);
6550 				table_size = le32_to_cpu(hdr->jt_size);
6551 			} else {
6552 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6553 				fw_data = (const __le32 *)
6554 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555 				table_offset = le32_to_cpu(hdr->jt_offset);
6556 				table_size = le32_to_cpu(hdr->jt_size);
6557 			}
6558 
6559 			for (i = 0; i < table_size; i ++) {
6560 				dst_ptr[bo_offset + i] =
6561 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6562 			}
6563 			bo_offset += table_size;
6564 		} else {
6565 			const __be32 *fw_data;
6566 			table_size = CP_ME_TABLE_SIZE;
6567 
6568 			if (me == 0) {
6569 				fw_data = (const __be32 *)rdev->ce_fw->data;
6570 				table_offset = CP_ME_TABLE_OFFSET;
6571 			} else if (me == 1) {
6572 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6573 				table_offset = CP_ME_TABLE_OFFSET;
6574 			} else if (me == 2) {
6575 				fw_data = (const __be32 *)rdev->me_fw->data;
6576 				table_offset = CP_ME_TABLE_OFFSET;
6577 			} else {
6578 				fw_data = (const __be32 *)rdev->mec_fw->data;
6579 				table_offset = CP_MEC_TABLE_OFFSET;
6580 			}
6581 
6582 			for (i = 0; i < table_size; i ++) {
6583 				dst_ptr[bo_offset + i] =
6584 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6585 			}
6586 			bo_offset += table_size;
6587 		}
6588 	}
6589 }
6590 
6591 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6592 				bool enable)
6593 {
6594 	u32 data, orig;
6595 
6596 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6597 		orig = data = RREG32(RLC_PG_CNTL);
6598 		data |= GFX_PG_ENABLE;
6599 		if (orig != data)
6600 			WREG32(RLC_PG_CNTL, data);
6601 
6602 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6603 		data |= AUTO_PG_EN;
6604 		if (orig != data)
6605 			WREG32(RLC_AUTO_PG_CTRL, data);
6606 	} else {
6607 		orig = data = RREG32(RLC_PG_CNTL);
6608 		data &= ~GFX_PG_ENABLE;
6609 		if (orig != data)
6610 			WREG32(RLC_PG_CNTL, data);
6611 
6612 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6613 		data &= ~AUTO_PG_EN;
6614 		if (orig != data)
6615 			WREG32(RLC_AUTO_PG_CTRL, data);
6616 
6617 		data = RREG32(DB_RENDER_CONTROL);
6618 	}
6619 }
6620 
6621 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6622 {
6623 	u32 mask = 0, tmp, tmp1;
6624 	int i;
6625 
6626 	mutex_lock(&rdev->grbm_idx_mutex);
6627 	cik_select_se_sh(rdev, se, sh);
6628 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6629 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6630 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6631 	mutex_unlock(&rdev->grbm_idx_mutex);
6632 
6633 	tmp &= 0xffff0000;
6634 
6635 	tmp |= tmp1;
6636 	tmp >>= 16;
6637 
6638 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6639 		mask <<= 1;
6640 		mask |= 1;
6641 	}
6642 
6643 	return (~tmp) & mask;
6644 }
6645 
6646 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6647 {
6648 	u32 i, j, k, active_cu_number = 0;
6649 	u32 mask, counter, cu_bitmap;
6650 	u32 tmp = 0;
6651 
6652 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6653 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6654 			mask = 1;
6655 			cu_bitmap = 0;
6656 			counter = 0;
6657 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6658 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6659 					if (counter < 2)
6660 						cu_bitmap |= mask;
6661 					counter ++;
6662 				}
6663 				mask <<= 1;
6664 			}
6665 
6666 			active_cu_number += counter;
6667 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6668 		}
6669 	}
6670 
6671 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6672 
6673 	tmp = RREG32(RLC_MAX_PG_CU);
6674 	tmp &= ~MAX_PU_CU_MASK;
6675 	tmp |= MAX_PU_CU(active_cu_number);
6676 	WREG32(RLC_MAX_PG_CU, tmp);
6677 }
6678 
6679 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6680 				       bool enable)
6681 {
6682 	u32 data, orig;
6683 
6684 	orig = data = RREG32(RLC_PG_CNTL);
6685 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6686 		data |= STATIC_PER_CU_PG_ENABLE;
6687 	else
6688 		data &= ~STATIC_PER_CU_PG_ENABLE;
6689 	if (orig != data)
6690 		WREG32(RLC_PG_CNTL, data);
6691 }
6692 
6693 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6694 					bool enable)
6695 {
6696 	u32 data, orig;
6697 
6698 	orig = data = RREG32(RLC_PG_CNTL);
6699 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6700 		data |= DYN_PER_CU_PG_ENABLE;
6701 	else
6702 		data &= ~DYN_PER_CU_PG_ENABLE;
6703 	if (orig != data)
6704 		WREG32(RLC_PG_CNTL, data);
6705 }
6706 
6707 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6708 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6709 
6710 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6711 {
6712 	u32 data, orig;
6713 	u32 i;
6714 
6715 	if (rdev->rlc.cs_data) {
6716 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6717 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6718 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6719 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6720 	} else {
6721 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6722 		for (i = 0; i < 3; i++)
6723 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6724 	}
6725 	if (rdev->rlc.reg_list) {
6726 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6727 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6728 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6729 	}
6730 
6731 	orig = data = RREG32(RLC_PG_CNTL);
6732 	data |= GFX_PG_SRC;
6733 	if (orig != data)
6734 		WREG32(RLC_PG_CNTL, data);
6735 
6736 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6737 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6738 
6739 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6740 	data &= ~IDLE_POLL_COUNT_MASK;
6741 	data |= IDLE_POLL_COUNT(0x60);
6742 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6743 
6744 	data = 0x10101010;
6745 	WREG32(RLC_PG_DELAY, data);
6746 
6747 	data = RREG32(RLC_PG_DELAY_2);
6748 	data &= ~0xff;
6749 	data |= 0x3;
6750 	WREG32(RLC_PG_DELAY_2, data);
6751 
6752 	data = RREG32(RLC_AUTO_PG_CTRL);
6753 	data &= ~GRBM_REG_SGIT_MASK;
6754 	data |= GRBM_REG_SGIT(0x700);
6755 	WREG32(RLC_AUTO_PG_CTRL, data);
6756 
6757 }
6758 
6759 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6760 {
6761 	cik_enable_gfx_cgpg(rdev, enable);
6762 	cik_enable_gfx_static_mgpg(rdev, enable);
6763 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6764 }
6765 
6766 u32 cik_get_csb_size(struct radeon_device *rdev)
6767 {
6768 	u32 count = 0;
6769 	const struct cs_section_def *sect = NULL;
6770 	const struct cs_extent_def *ext = NULL;
6771 
6772 	if (rdev->rlc.cs_data == NULL)
6773 		return 0;
6774 
6775 	/* begin clear state */
6776 	count += 2;
6777 	/* context control state */
6778 	count += 3;
6779 
6780 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6781 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6782 			if (sect->id == SECT_CONTEXT)
6783 				count += 2 + ext->reg_count;
6784 			else
6785 				return 0;
6786 		}
6787 	}
6788 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6789 	count += 4;
6790 	/* end clear state */
6791 	count += 2;
6792 	/* clear state */
6793 	count += 2;
6794 
6795 	return count;
6796 }
6797 
6798 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6799 {
6800 	u32 count = 0, i;
6801 	const struct cs_section_def *sect = NULL;
6802 	const struct cs_extent_def *ext = NULL;
6803 
6804 	if (rdev->rlc.cs_data == NULL)
6805 		return;
6806 	if (buffer == NULL)
6807 		return;
6808 
6809 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6810 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6811 
6812 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6813 	buffer[count++] = cpu_to_le32(0x80000000);
6814 	buffer[count++] = cpu_to_le32(0x80000000);
6815 
6816 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6817 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6818 			if (sect->id == SECT_CONTEXT) {
6819 				buffer[count++] =
6820 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6821 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6822 				for (i = 0; i < ext->reg_count; i++)
6823 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6824 			} else {
6825 				return;
6826 			}
6827 		}
6828 	}
6829 
6830 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6831 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6832 	switch (rdev->family) {
6833 	case CHIP_BONAIRE:
6834 		buffer[count++] = cpu_to_le32(0x16000012);
6835 		buffer[count++] = cpu_to_le32(0x00000000);
6836 		break;
6837 	case CHIP_KAVERI:
6838 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6839 		buffer[count++] = cpu_to_le32(0x00000000);
6840 		break;
6841 	case CHIP_KABINI:
6842 	case CHIP_MULLINS:
6843 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6844 		buffer[count++] = cpu_to_le32(0x00000000);
6845 		break;
6846 	case CHIP_HAWAII:
6847 		buffer[count++] = cpu_to_le32(0x3a00161a);
6848 		buffer[count++] = cpu_to_le32(0x0000002e);
6849 		break;
6850 	default:
6851 		buffer[count++] = cpu_to_le32(0x00000000);
6852 		buffer[count++] = cpu_to_le32(0x00000000);
6853 		break;
6854 	}
6855 
6856 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6857 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6858 
6859 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6860 	buffer[count++] = cpu_to_le32(0);
6861 }
6862 
6863 static void cik_init_pg(struct radeon_device *rdev)
6864 {
6865 	if (rdev->pg_flags) {
6866 		cik_enable_sck_slowdown_on_pu(rdev, true);
6867 		cik_enable_sck_slowdown_on_pd(rdev, true);
6868 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869 			cik_init_gfx_cgpg(rdev);
6870 			cik_enable_cp_pg(rdev, true);
6871 			cik_enable_gds_pg(rdev, true);
6872 		}
6873 		cik_init_ao_cu_mask(rdev);
6874 		cik_update_gfx_pg(rdev, true);
6875 	}
6876 }
6877 
6878 static void cik_fini_pg(struct radeon_device *rdev)
6879 {
6880 	if (rdev->pg_flags) {
6881 		cik_update_gfx_pg(rdev, false);
6882 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6883 			cik_enable_cp_pg(rdev, false);
6884 			cik_enable_gds_pg(rdev, false);
6885 		}
6886 	}
6887 }
6888 
6889 /*
6890  * Interrupts
6891  * Starting with r6xx, interrupts are handled via a ring buffer.
6892  * Ring buffers are areas of GPU accessible memory that the GPU
6893  * writes interrupt vectors into and the host reads vectors out of.
6894  * There is a rptr (read pointer) that determines where the
6895  * host is currently reading, and a wptr (write pointer)
6896  * which determines where the GPU has written.  When the
6897  * pointers are equal, the ring is idle.  When the GPU
6898  * writes vectors to the ring buffer, it increments the
6899  * wptr.  When there is an interrupt, the host then starts
6900  * fetching commands and processing them until the pointers are
6901  * equal again at which point it updates the rptr.
6902  */
6903 
6904 /**
6905  * cik_enable_interrupts - Enable the interrupt ring buffer
6906  *
6907  * @rdev: radeon_device pointer
6908  *
6909  * Enable the interrupt ring buffer (CIK).
6910  */
6911 static void cik_enable_interrupts(struct radeon_device *rdev)
6912 {
6913 	u32 ih_cntl = RREG32(IH_CNTL);
6914 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6915 
6916 	ih_cntl |= ENABLE_INTR;
6917 	ih_rb_cntl |= IH_RB_ENABLE;
6918 	WREG32(IH_CNTL, ih_cntl);
6919 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6920 	rdev->ih.enabled = true;
6921 }
6922 
6923 /**
6924  * cik_disable_interrupts - Disable the interrupt ring buffer
6925  *
6926  * @rdev: radeon_device pointer
6927  *
6928  * Disable the interrupt ring buffer (CIK).
6929  */
6930 static void cik_disable_interrupts(struct radeon_device *rdev)
6931 {
6932 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933 	u32 ih_cntl = RREG32(IH_CNTL);
6934 
6935 	ih_rb_cntl &= ~IH_RB_ENABLE;
6936 	ih_cntl &= ~ENABLE_INTR;
6937 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6938 	WREG32(IH_CNTL, ih_cntl);
6939 	/* set rptr, wptr to 0 */
6940 	WREG32(IH_RB_RPTR, 0);
6941 	WREG32(IH_RB_WPTR, 0);
6942 	rdev->ih.enabled = false;
6943 	rdev->ih.rptr = 0;
6944 }
6945 
6946 /**
6947  * cik_disable_interrupt_state - Disable all interrupt sources
6948  *
6949  * @rdev: radeon_device pointer
6950  *
6951  * Clear all interrupt enable bits used by the driver (CIK).
6952  */
6953 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6954 {
6955 	u32 tmp;
6956 
6957 	/* gfx ring */
6958 	tmp = RREG32(CP_INT_CNTL_RING0) &
6959 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6960 	WREG32(CP_INT_CNTL_RING0, tmp);
6961 	/* sdma */
6962 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6963 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6964 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6965 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6966 	/* compute queues */
6967 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6968 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6969 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6970 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6971 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6972 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6973 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6974 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6975 	/* grbm */
6976 	WREG32(GRBM_INT_CNTL, 0);
6977 	/* SRBM */
6978 	WREG32(SRBM_INT_CNTL, 0);
6979 	/* vline/vblank, etc. */
6980 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6981 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6982 	if (rdev->num_crtc >= 4) {
6983 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6984 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6985 	}
6986 	if (rdev->num_crtc >= 6) {
6987 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6988 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6989 	}
6990 	/* pflip */
6991 	if (rdev->num_crtc >= 2) {
6992 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994 	}
6995 	if (rdev->num_crtc >= 4) {
6996 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6997 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6998 	}
6999 	if (rdev->num_crtc >= 6) {
7000 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7001 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7002 	}
7003 
7004 	/* dac hotplug */
7005 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7006 
7007 	/* digital hotplug */
7008 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7010 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7012 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7014 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7015 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7016 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7017 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7018 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7019 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7020 
7021 }
7022 
7023 /**
7024  * cik_irq_init - init and enable the interrupt ring
7025  *
7026  * @rdev: radeon_device pointer
7027  *
7028  * Allocate a ring buffer for the interrupt controller,
7029  * enable the RLC, disable interrupts, enable the IH
7030  * ring buffer and enable it (CIK).
7031  * Called at device load and reume.
7032  * Returns 0 for success, errors for failure.
7033  */
7034 static int cik_irq_init(struct radeon_device *rdev)
7035 {
7036 	int ret = 0;
7037 	int rb_bufsz;
7038 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7039 
7040 	/* allocate ring */
7041 	ret = r600_ih_ring_alloc(rdev);
7042 	if (ret)
7043 		return ret;
7044 
7045 	/* disable irqs */
7046 	cik_disable_interrupts(rdev);
7047 
7048 	/* init rlc */
7049 	ret = cik_rlc_resume(rdev);
7050 	if (ret) {
7051 		r600_ih_ring_fini(rdev);
7052 		return ret;
7053 	}
7054 
7055 	/* setup interrupt control */
7056 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7057 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7058 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7059 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7060 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7061 	 */
7062 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7063 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7064 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7065 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7066 
7067 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7068 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7069 
7070 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7071 		      IH_WPTR_OVERFLOW_CLEAR |
7072 		      (rb_bufsz << 1));
7073 
7074 	if (rdev->wb.enabled)
7075 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7076 
7077 	/* set the writeback address whether it's enabled or not */
7078 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7079 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7080 
7081 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7082 
7083 	/* set rptr, wptr to 0 */
7084 	WREG32(IH_RB_RPTR, 0);
7085 	WREG32(IH_RB_WPTR, 0);
7086 
7087 	/* Default settings for IH_CNTL (disabled at first) */
7088 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7089 	/* RPTR_REARM only works if msi's are enabled */
7090 	if (rdev->msi_enabled)
7091 		ih_cntl |= RPTR_REARM;
7092 	WREG32(IH_CNTL, ih_cntl);
7093 
7094 	/* force the active interrupt state to all disabled */
7095 	cik_disable_interrupt_state(rdev);
7096 
7097 	pci_set_master(rdev->pdev);
7098 
7099 	/* enable irqs */
7100 	cik_enable_interrupts(rdev);
7101 
7102 	return ret;
7103 }
7104 
7105 /**
7106  * cik_irq_set - enable/disable interrupt sources
7107  *
7108  * @rdev: radeon_device pointer
7109  *
7110  * Enable interrupt sources on the GPU (vblanks, hpd,
7111  * etc.) (CIK).
7112  * Returns 0 for success, errors for failure.
7113  */
7114 int cik_irq_set(struct radeon_device *rdev)
7115 {
7116 	u32 cp_int_cntl;
7117 	u32 cp_m1p0;
7118 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7119 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7120 	u32 grbm_int_cntl = 0;
7121 	u32 dma_cntl, dma_cntl1;
7122 
7123 	if (!rdev->irq.installed) {
7124 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7125 		return -EINVAL;
7126 	}
7127 	/* don't enable anything if the ih is disabled */
7128 	if (!rdev->ih.enabled) {
7129 		cik_disable_interrupts(rdev);
7130 		/* force the active interrupt state to all disabled */
7131 		cik_disable_interrupt_state(rdev);
7132 		return 0;
7133 	}
7134 
7135 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7136 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7137 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7138 
7139 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7140 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7141 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7142 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7143 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7144 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7145 
7146 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7147 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7148 
7149 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7150 
7151 	/* enable CP interrupts on all rings */
7152 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7153 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7154 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7155 	}
7156 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7157 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7158 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7159 		if (ring->me == 1) {
7160 			switch (ring->pipe) {
7161 			case 0:
7162 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163 				break;
7164 			default:
7165 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7166 				break;
7167 			}
7168 		} else {
7169 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7170 		}
7171 	}
7172 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7173 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7174 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7175 		if (ring->me == 1) {
7176 			switch (ring->pipe) {
7177 			case 0:
7178 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7179 				break;
7180 			default:
7181 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7182 				break;
7183 			}
7184 		} else {
7185 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7186 		}
7187 	}
7188 
7189 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7190 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7191 		dma_cntl |= TRAP_ENABLE;
7192 	}
7193 
7194 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7195 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7196 		dma_cntl1 |= TRAP_ENABLE;
7197 	}
7198 
7199 	if (rdev->irq.crtc_vblank_int[0] ||
7200 	    atomic_read(&rdev->irq.pflip[0])) {
7201 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7202 		crtc1 |= VBLANK_INTERRUPT_MASK;
7203 	}
7204 	if (rdev->irq.crtc_vblank_int[1] ||
7205 	    atomic_read(&rdev->irq.pflip[1])) {
7206 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7207 		crtc2 |= VBLANK_INTERRUPT_MASK;
7208 	}
7209 	if (rdev->irq.crtc_vblank_int[2] ||
7210 	    atomic_read(&rdev->irq.pflip[2])) {
7211 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7212 		crtc3 |= VBLANK_INTERRUPT_MASK;
7213 	}
7214 	if (rdev->irq.crtc_vblank_int[3] ||
7215 	    atomic_read(&rdev->irq.pflip[3])) {
7216 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7217 		crtc4 |= VBLANK_INTERRUPT_MASK;
7218 	}
7219 	if (rdev->irq.crtc_vblank_int[4] ||
7220 	    atomic_read(&rdev->irq.pflip[4])) {
7221 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7222 		crtc5 |= VBLANK_INTERRUPT_MASK;
7223 	}
7224 	if (rdev->irq.crtc_vblank_int[5] ||
7225 	    atomic_read(&rdev->irq.pflip[5])) {
7226 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7227 		crtc6 |= VBLANK_INTERRUPT_MASK;
7228 	}
7229 	if (rdev->irq.hpd[0]) {
7230 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7231 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232 	}
7233 	if (rdev->irq.hpd[1]) {
7234 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7235 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236 	}
7237 	if (rdev->irq.hpd[2]) {
7238 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7239 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7240 	}
7241 	if (rdev->irq.hpd[3]) {
7242 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7243 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244 	}
7245 	if (rdev->irq.hpd[4]) {
7246 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7247 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248 	}
7249 	if (rdev->irq.hpd[5]) {
7250 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7251 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252 	}
7253 
7254 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7255 
7256 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7257 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7258 
7259 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7260 
7261 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7262 
7263 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7264 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7265 	if (rdev->num_crtc >= 4) {
7266 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7267 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7268 	}
7269 	if (rdev->num_crtc >= 6) {
7270 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7271 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7272 	}
7273 
7274 	if (rdev->num_crtc >= 2) {
7275 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7276 		       GRPH_PFLIP_INT_MASK);
7277 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7278 		       GRPH_PFLIP_INT_MASK);
7279 	}
7280 	if (rdev->num_crtc >= 4) {
7281 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7282 		       GRPH_PFLIP_INT_MASK);
7283 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7284 		       GRPH_PFLIP_INT_MASK);
7285 	}
7286 	if (rdev->num_crtc >= 6) {
7287 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7288 		       GRPH_PFLIP_INT_MASK);
7289 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7290 		       GRPH_PFLIP_INT_MASK);
7291 	}
7292 
7293 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7294 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7295 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7296 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7297 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7298 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7299 
7300 	/* posting read */
7301 	RREG32(SRBM_STATUS);
7302 
7303 	return 0;
7304 }
7305 
7306 /**
7307  * cik_irq_ack - ack interrupt sources
7308  *
7309  * @rdev: radeon_device pointer
7310  *
7311  * Ack interrupt sources on the GPU (vblanks, hpd,
7312  * etc.) (CIK).  Certain interrupts sources are sw
7313  * generated and do not require an explicit ack.
7314  */
7315 static inline void cik_irq_ack(struct radeon_device *rdev)
7316 {
7317 	u32 tmp;
7318 
7319 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7320 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7321 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7322 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7323 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7324 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7325 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7326 
7327 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7328 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7329 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7330 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7331 	if (rdev->num_crtc >= 4) {
7332 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7333 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7334 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7335 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7336 	}
7337 	if (rdev->num_crtc >= 6) {
7338 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7339 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7340 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7341 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7342 	}
7343 
7344 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7345 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7346 		       GRPH_PFLIP_INT_CLEAR);
7347 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7348 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7349 		       GRPH_PFLIP_INT_CLEAR);
7350 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7351 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7352 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7353 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7354 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7355 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7356 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7357 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7358 
7359 	if (rdev->num_crtc >= 4) {
7360 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7361 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7362 			       GRPH_PFLIP_INT_CLEAR);
7363 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7364 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7365 			       GRPH_PFLIP_INT_CLEAR);
7366 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7367 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7368 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7369 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7370 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7371 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7372 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7373 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7374 	}
7375 
7376 	if (rdev->num_crtc >= 6) {
7377 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7378 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7379 			       GRPH_PFLIP_INT_CLEAR);
7380 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7381 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7382 			       GRPH_PFLIP_INT_CLEAR);
7383 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7384 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7385 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7386 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7387 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7388 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7389 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7390 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7391 	}
7392 
7393 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7394 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395 		tmp |= DC_HPDx_INT_ACK;
7396 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397 	}
7398 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7399 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400 		tmp |= DC_HPDx_INT_ACK;
7401 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402 	}
7403 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7404 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405 		tmp |= DC_HPDx_INT_ACK;
7406 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407 	}
7408 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7409 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410 		tmp |= DC_HPDx_INT_ACK;
7411 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412 	}
7413 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7414 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415 		tmp |= DC_HPDx_INT_ACK;
7416 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417 	}
7418 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7419 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7420 		tmp |= DC_HPDx_INT_ACK;
7421 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422 	}
7423 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7424 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7425 		tmp |= DC_HPDx_RX_INT_ACK;
7426 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7427 	}
7428 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7429 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7430 		tmp |= DC_HPDx_RX_INT_ACK;
7431 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7432 	}
7433 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7434 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7435 		tmp |= DC_HPDx_RX_INT_ACK;
7436 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7437 	}
7438 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7439 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7440 		tmp |= DC_HPDx_RX_INT_ACK;
7441 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7442 	}
7443 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7444 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7445 		tmp |= DC_HPDx_RX_INT_ACK;
7446 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7447 	}
7448 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7449 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7450 		tmp |= DC_HPDx_RX_INT_ACK;
7451 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7452 	}
7453 }
7454 
7455 /**
7456  * cik_irq_disable - disable interrupts
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts on the hw (CIK).
7461  */
7462 static void cik_irq_disable(struct radeon_device *rdev)
7463 {
7464 	cik_disable_interrupts(rdev);
7465 	/* Wait and acknowledge irq */
7466 	mdelay(1);
7467 	cik_irq_ack(rdev);
7468 	cik_disable_interrupt_state(rdev);
7469 }
7470 
7471 /**
7472  * cik_irq_disable - disable interrupts for suspend
7473  *
7474  * @rdev: radeon_device pointer
7475  *
7476  * Disable interrupts and stop the RLC (CIK).
7477  * Used for suspend.
7478  */
7479 static void cik_irq_suspend(struct radeon_device *rdev)
7480 {
7481 	cik_irq_disable(rdev);
7482 	cik_rlc_stop(rdev);
7483 }
7484 
7485 /**
7486  * cik_irq_fini - tear down interrupt support
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Disable interrupts on the hw and free the IH ring
7491  * buffer (CIK).
7492  * Used for driver unload.
7493  */
7494 static void cik_irq_fini(struct radeon_device *rdev)
7495 {
7496 	cik_irq_suspend(rdev);
7497 	r600_ih_ring_fini(rdev);
7498 }
7499 
7500 /**
7501  * cik_get_ih_wptr - get the IH ring buffer wptr
7502  *
7503  * @rdev: radeon_device pointer
7504  *
7505  * Get the IH ring buffer wptr from either the register
7506  * or the writeback memory buffer (CIK).  Also check for
7507  * ring buffer overflow and deal with it.
7508  * Used by cik_irq_process().
7509  * Returns the value of the wptr.
7510  */
7511 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7512 {
7513 	u32 wptr, tmp;
7514 
7515 	if (rdev->wb.enabled)
7516 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7517 	else
7518 		wptr = RREG32(IH_RB_WPTR);
7519 
7520 	if (wptr & RB_OVERFLOW) {
7521 		wptr &= ~RB_OVERFLOW;
7522 		/* When a ring buffer overflow happen start parsing interrupt
7523 		 * from the last not overwritten vector (wptr + 16). Hopefully
7524 		 * this should allow us to catchup.
7525 		 */
7526 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7527 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7528 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7529 		tmp = RREG32(IH_RB_CNTL);
7530 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7531 		WREG32(IH_RB_CNTL, tmp);
7532 	}
7533 	return (wptr & rdev->ih.ptr_mask);
7534 }
7535 
7536 /*        CIK IV Ring
7537  * Each IV ring entry is 128 bits:
7538  * [7:0]    - interrupt source id
7539  * [31:8]   - reserved
7540  * [59:32]  - interrupt source data
7541  * [63:60]  - reserved
7542  * [71:64]  - RINGID
7543  *            CP:
7544  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7545  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7546  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7547  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7548  *            PIPE_ID - ME0 0=3D
7549  *                    - ME1&2 compute dispatcher (4 pipes each)
7550  *            SDMA:
7551  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7552  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7553  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7554  * [79:72]  - VMID
7555  * [95:80]  - PASID
7556  * [127:96] - reserved
7557  */
7558 /**
7559  * cik_irq_process - interrupt handler
7560  *
7561  * @rdev: radeon_device pointer
7562  *
7563  * Interrupt hander (CIK).  Walk the IH ring,
7564  * ack interrupts and schedule work to handle
7565  * interrupt events.
7566  * Returns irq process return code.
7567  */
7568 int cik_irq_process(struct radeon_device *rdev)
7569 {
7570 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7571 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7572 	u32 wptr;
7573 	u32 rptr;
7574 	u32 src_id, src_data, ring_id;
7575 	u8 me_id, pipe_id, queue_id;
7576 	u32 ring_index;
7577 	bool queue_hotplug = false;
7578 	bool queue_dp = false;
7579 	bool queue_reset = false;
7580 	u32 addr, status, mc_client;
7581 	bool queue_thermal = false;
7582 
7583 	if (!rdev->ih.enabled || rdev->shutdown)
7584 		return IRQ_NONE;
7585 
7586 	wptr = cik_get_ih_wptr(rdev);
7587 
7588 restart_ih:
7589 	/* is somebody else already processing irqs? */
7590 	if (atomic_xchg(&rdev->ih.lock, 1))
7591 		return IRQ_NONE;
7592 
7593 	rptr = rdev->ih.rptr;
7594 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7595 
7596 	/* Order reading of wptr vs. reading of IH ring data */
7597 	rmb();
7598 
7599 	/* display interrupts */
7600 	cik_irq_ack(rdev);
7601 
7602 	while (rptr != wptr) {
7603 		/* wptr/rptr are in bytes! */
7604 		ring_index = rptr / 4;
7605 
7606 		radeon_kfd_interrupt(rdev,
7607 				(const void *) &rdev->ih.ring[ring_index]);
7608 
7609 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7610 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7611 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7612 
7613 		switch (src_id) {
7614 		case 1: /* D1 vblank/vline */
7615 			switch (src_data) {
7616 			case 0: /* D1 vblank */
7617 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7618 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619 
7620 				if (rdev->irq.crtc_vblank_int[0]) {
7621 					drm_handle_vblank(rdev->ddev, 0);
7622 					rdev->pm.vblank_sync = true;
7623 					wake_up(&rdev->irq.vblank_queue);
7624 				}
7625 				if (atomic_read(&rdev->irq.pflip[0]))
7626 					radeon_crtc_handle_vblank(rdev, 0);
7627 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7628 				DRM_DEBUG("IH: D1 vblank\n");
7629 
7630 				break;
7631 			case 1: /* D1 vline */
7632 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7633 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634 
7635 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7636 				DRM_DEBUG("IH: D1 vline\n");
7637 
7638 				break;
7639 			default:
7640 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7641 				break;
7642 			}
7643 			break;
7644 		case 2: /* D2 vblank/vline */
7645 			switch (src_data) {
7646 			case 0: /* D2 vblank */
7647 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7648 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649 
7650 				if (rdev->irq.crtc_vblank_int[1]) {
7651 					drm_handle_vblank(rdev->ddev, 1);
7652 					rdev->pm.vblank_sync = true;
7653 					wake_up(&rdev->irq.vblank_queue);
7654 				}
7655 				if (atomic_read(&rdev->irq.pflip[1]))
7656 					radeon_crtc_handle_vblank(rdev, 1);
7657 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7658 				DRM_DEBUG("IH: D2 vblank\n");
7659 
7660 				break;
7661 			case 1: /* D2 vline */
7662 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7663 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664 
7665 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7666 				DRM_DEBUG("IH: D2 vline\n");
7667 
7668 				break;
7669 			default:
7670 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7671 				break;
7672 			}
7673 			break;
7674 		case 3: /* D3 vblank/vline */
7675 			switch (src_data) {
7676 			case 0: /* D3 vblank */
7677 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7678 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679 
7680 				if (rdev->irq.crtc_vblank_int[2]) {
7681 					drm_handle_vblank(rdev->ddev, 2);
7682 					rdev->pm.vblank_sync = true;
7683 					wake_up(&rdev->irq.vblank_queue);
7684 				}
7685 				if (atomic_read(&rdev->irq.pflip[2]))
7686 					radeon_crtc_handle_vblank(rdev, 2);
7687 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7688 				DRM_DEBUG("IH: D3 vblank\n");
7689 
7690 				break;
7691 			case 1: /* D3 vline */
7692 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7693 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694 
7695 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7696 				DRM_DEBUG("IH: D3 vline\n");
7697 
7698 				break;
7699 			default:
7700 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7701 				break;
7702 			}
7703 			break;
7704 		case 4: /* D4 vblank/vline */
7705 			switch (src_data) {
7706 			case 0: /* D4 vblank */
7707 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7708 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709 
7710 				if (rdev->irq.crtc_vblank_int[3]) {
7711 					drm_handle_vblank(rdev->ddev, 3);
7712 					rdev->pm.vblank_sync = true;
7713 					wake_up(&rdev->irq.vblank_queue);
7714 				}
7715 				if (atomic_read(&rdev->irq.pflip[3]))
7716 					radeon_crtc_handle_vblank(rdev, 3);
7717 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7718 				DRM_DEBUG("IH: D4 vblank\n");
7719 
7720 				break;
7721 			case 1: /* D4 vline */
7722 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7723 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724 
7725 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7726 				DRM_DEBUG("IH: D4 vline\n");
7727 
7728 				break;
7729 			default:
7730 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7731 				break;
7732 			}
7733 			break;
7734 		case 5: /* D5 vblank/vline */
7735 			switch (src_data) {
7736 			case 0: /* D5 vblank */
7737 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7738 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739 
7740 				if (rdev->irq.crtc_vblank_int[4]) {
7741 					drm_handle_vblank(rdev->ddev, 4);
7742 					rdev->pm.vblank_sync = true;
7743 					wake_up(&rdev->irq.vblank_queue);
7744 				}
7745 				if (atomic_read(&rdev->irq.pflip[4]))
7746 					radeon_crtc_handle_vblank(rdev, 4);
7747 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7748 				DRM_DEBUG("IH: D5 vblank\n");
7749 
7750 				break;
7751 			case 1: /* D5 vline */
7752 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7753 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754 
7755 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7756 				DRM_DEBUG("IH: D5 vline\n");
7757 
7758 				break;
7759 			default:
7760 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7761 				break;
7762 			}
7763 			break;
7764 		case 6: /* D6 vblank/vline */
7765 			switch (src_data) {
7766 			case 0: /* D6 vblank */
7767 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7768 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769 
7770 				if (rdev->irq.crtc_vblank_int[5]) {
7771 					drm_handle_vblank(rdev->ddev, 5);
7772 					rdev->pm.vblank_sync = true;
7773 					wake_up(&rdev->irq.vblank_queue);
7774 				}
7775 				if (atomic_read(&rdev->irq.pflip[5]))
7776 					radeon_crtc_handle_vblank(rdev, 5);
7777 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7778 				DRM_DEBUG("IH: D6 vblank\n");
7779 
7780 				break;
7781 			case 1: /* D6 vline */
7782 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7783 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784 
7785 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7786 				DRM_DEBUG("IH: D6 vline\n");
7787 
7788 				break;
7789 			default:
7790 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791 				break;
7792 			}
7793 			break;
7794 		case 8: /* D1 page flip */
7795 		case 10: /* D2 page flip */
7796 		case 12: /* D3 page flip */
7797 		case 14: /* D4 page flip */
7798 		case 16: /* D5 page flip */
7799 		case 18: /* D6 page flip */
7800 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7801 			if (radeon_use_pflipirq > 0)
7802 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7803 			break;
7804 		case 42: /* HPD hotplug */
7805 			switch (src_data) {
7806 			case 0:
7807 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7808 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809 
7810 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7811 				queue_hotplug = true;
7812 				DRM_DEBUG("IH: HPD1\n");
7813 
7814 				break;
7815 			case 1:
7816 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7817 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818 
7819 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7820 				queue_hotplug = true;
7821 				DRM_DEBUG("IH: HPD2\n");
7822 
7823 				break;
7824 			case 2:
7825 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7826 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827 
7828 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7829 				queue_hotplug = true;
7830 				DRM_DEBUG("IH: HPD3\n");
7831 
7832 				break;
7833 			case 3:
7834 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7835 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836 
7837 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7838 				queue_hotplug = true;
7839 				DRM_DEBUG("IH: HPD4\n");
7840 
7841 				break;
7842 			case 4:
7843 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7844 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845 
7846 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7847 				queue_hotplug = true;
7848 				DRM_DEBUG("IH: HPD5\n");
7849 
7850 				break;
7851 			case 5:
7852 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7853 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854 
7855 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7856 				queue_hotplug = true;
7857 				DRM_DEBUG("IH: HPD6\n");
7858 
7859 				break;
7860 			case 6:
7861 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7862 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863 
7864 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7865 				queue_dp = true;
7866 				DRM_DEBUG("IH: HPD_RX 1\n");
7867 
7868 				break;
7869 			case 7:
7870 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7871 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872 
7873 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7874 				queue_dp = true;
7875 				DRM_DEBUG("IH: HPD_RX 2\n");
7876 
7877 				break;
7878 			case 8:
7879 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7880 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881 
7882 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7883 				queue_dp = true;
7884 				DRM_DEBUG("IH: HPD_RX 3\n");
7885 
7886 				break;
7887 			case 9:
7888 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7889 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890 
7891 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7892 				queue_dp = true;
7893 				DRM_DEBUG("IH: HPD_RX 4\n");
7894 
7895 				break;
7896 			case 10:
7897 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7898 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7899 
7900 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7901 				queue_dp = true;
7902 				DRM_DEBUG("IH: HPD_RX 5\n");
7903 
7904 				break;
7905 			case 11:
7906 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7907 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7908 
7909 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7910 				queue_dp = true;
7911 				DRM_DEBUG("IH: HPD_RX 6\n");
7912 
7913 				break;
7914 			default:
7915 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916 				break;
7917 			}
7918 			break;
7919 		case 96:
7920 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7921 			WREG32(SRBM_INT_ACK, 0x1);
7922 			break;
7923 		case 124: /* UVD */
7924 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7925 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7926 			break;
7927 		case 146:
7928 		case 147:
7929 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7930 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7931 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7932 			/* reset addr and status */
7933 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7934 			if (addr == 0x0 && status == 0x0)
7935 				break;
7936 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7937 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7938 				addr);
7939 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7940 				status);
7941 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7942 			break;
7943 		case 167: /* VCE */
7944 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7945 			switch (src_data) {
7946 			case 0:
7947 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7948 				break;
7949 			case 1:
7950 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7951 				break;
7952 			default:
7953 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7954 				break;
7955 			}
7956 			break;
7957 		case 176: /* GFX RB CP_INT */
7958 		case 177: /* GFX IB CP_INT */
7959 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7960 			break;
7961 		case 181: /* CP EOP event */
7962 			DRM_DEBUG("IH: CP EOP\n");
7963 			/* XXX check the bitfield order! */
7964 			me_id = (ring_id & 0x60) >> 5;
7965 			pipe_id = (ring_id & 0x18) >> 3;
7966 			queue_id = (ring_id & 0x7) >> 0;
7967 			switch (me_id) {
7968 			case 0:
7969 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7970 				break;
7971 			case 1:
7972 			case 2:
7973 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7974 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7975 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7976 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7977 				break;
7978 			}
7979 			break;
7980 		case 184: /* CP Privileged reg access */
7981 			DRM_ERROR("Illegal register access in command stream\n");
7982 			/* XXX check the bitfield order! */
7983 			me_id = (ring_id & 0x60) >> 5;
7984 			pipe_id = (ring_id & 0x18) >> 3;
7985 			queue_id = (ring_id & 0x7) >> 0;
7986 			switch (me_id) {
7987 			case 0:
7988 				/* This results in a full GPU reset, but all we need to do is soft
7989 				 * reset the CP for gfx
7990 				 */
7991 				queue_reset = true;
7992 				break;
7993 			case 1:
7994 				/* XXX compute */
7995 				queue_reset = true;
7996 				break;
7997 			case 2:
7998 				/* XXX compute */
7999 				queue_reset = true;
8000 				break;
8001 			}
8002 			break;
8003 		case 185: /* CP Privileged inst */
8004 			DRM_ERROR("Illegal instruction in command stream\n");
8005 			/* XXX check the bitfield order! */
8006 			me_id = (ring_id & 0x60) >> 5;
8007 			pipe_id = (ring_id & 0x18) >> 3;
8008 			queue_id = (ring_id & 0x7) >> 0;
8009 			switch (me_id) {
8010 			case 0:
8011 				/* This results in a full GPU reset, but all we need to do is soft
8012 				 * reset the CP for gfx
8013 				 */
8014 				queue_reset = true;
8015 				break;
8016 			case 1:
8017 				/* XXX compute */
8018 				queue_reset = true;
8019 				break;
8020 			case 2:
8021 				/* XXX compute */
8022 				queue_reset = true;
8023 				break;
8024 			}
8025 			break;
8026 		case 224: /* SDMA trap event */
8027 			/* XXX check the bitfield order! */
8028 			me_id = (ring_id & 0x3) >> 0;
8029 			queue_id = (ring_id & 0xc) >> 2;
8030 			DRM_DEBUG("IH: SDMA trap\n");
8031 			switch (me_id) {
8032 			case 0:
8033 				switch (queue_id) {
8034 				case 0:
8035 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8036 					break;
8037 				case 1:
8038 					/* XXX compute */
8039 					break;
8040 				case 2:
8041 					/* XXX compute */
8042 					break;
8043 				}
8044 				break;
8045 			case 1:
8046 				switch (queue_id) {
8047 				case 0:
8048 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8049 					break;
8050 				case 1:
8051 					/* XXX compute */
8052 					break;
8053 				case 2:
8054 					/* XXX compute */
8055 					break;
8056 				}
8057 				break;
8058 			}
8059 			break;
8060 		case 230: /* thermal low to high */
8061 			DRM_DEBUG("IH: thermal low to high\n");
8062 			rdev->pm.dpm.thermal.high_to_low = false;
8063 			queue_thermal = true;
8064 			break;
8065 		case 231: /* thermal high to low */
8066 			DRM_DEBUG("IH: thermal high to low\n");
8067 			rdev->pm.dpm.thermal.high_to_low = true;
8068 			queue_thermal = true;
8069 			break;
8070 		case 233: /* GUI IDLE */
8071 			DRM_DEBUG("IH: GUI idle\n");
8072 			break;
8073 		case 241: /* SDMA Privileged inst */
8074 		case 247: /* SDMA Privileged inst */
8075 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8076 			/* XXX check the bitfield order! */
8077 			me_id = (ring_id & 0x3) >> 0;
8078 			queue_id = (ring_id & 0xc) >> 2;
8079 			switch (me_id) {
8080 			case 0:
8081 				switch (queue_id) {
8082 				case 0:
8083 					queue_reset = true;
8084 					break;
8085 				case 1:
8086 					/* XXX compute */
8087 					queue_reset = true;
8088 					break;
8089 				case 2:
8090 					/* XXX compute */
8091 					queue_reset = true;
8092 					break;
8093 				}
8094 				break;
8095 			case 1:
8096 				switch (queue_id) {
8097 				case 0:
8098 					queue_reset = true;
8099 					break;
8100 				case 1:
8101 					/* XXX compute */
8102 					queue_reset = true;
8103 					break;
8104 				case 2:
8105 					/* XXX compute */
8106 					queue_reset = true;
8107 					break;
8108 				}
8109 				break;
8110 			}
8111 			break;
8112 		default:
8113 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8114 			break;
8115 		}
8116 
8117 		/* wptr/rptr are in bytes! */
8118 		rptr += 16;
8119 		rptr &= rdev->ih.ptr_mask;
8120 		WREG32(IH_RB_RPTR, rptr);
8121 	}
8122 	if (queue_dp)
8123 		schedule_work(&rdev->dp_work);
8124 	if (queue_hotplug)
8125 		schedule_delayed_work(&rdev->hotplug_work, 0);
8126 	if (queue_reset) {
8127 		rdev->needs_reset = true;
8128 		wake_up_all(&rdev->fence_queue);
8129 	}
8130 	if (queue_thermal)
8131 		schedule_work(&rdev->pm.dpm.thermal.work);
8132 	rdev->ih.rptr = rptr;
8133 	atomic_set(&rdev->ih.lock, 0);
8134 
8135 	/* make sure wptr hasn't changed while processing */
8136 	wptr = cik_get_ih_wptr(rdev);
8137 	if (wptr != rptr)
8138 		goto restart_ih;
8139 
8140 	return IRQ_HANDLED;
8141 }
8142 
8143 /*
8144  * startup/shutdown callbacks
8145  */
8146 static void cik_uvd_init(struct radeon_device *rdev)
8147 {
8148 	int r;
8149 
8150 	if (!rdev->has_uvd)
8151 		return;
8152 
8153 	r = radeon_uvd_init(rdev);
8154 	if (r) {
8155 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8156 		/*
8157 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8158 		 * to early fails cik_uvd_start() and thus nothing happens
8159 		 * there. So it is pointless to try to go through that code
8160 		 * hence why we disable uvd here.
8161 		 */
8162 		rdev->has_uvd = 0;
8163 		return;
8164 	}
8165 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8166 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8167 }
8168 
8169 static void cik_uvd_start(struct radeon_device *rdev)
8170 {
8171 	int r;
8172 
8173 	if (!rdev->has_uvd)
8174 		return;
8175 
8176 	r = radeon_uvd_resume(rdev);
8177 	if (r) {
8178 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8179 		goto error;
8180 	}
8181 	r = uvd_v4_2_resume(rdev);
8182 	if (r) {
8183 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8184 		goto error;
8185 	}
8186 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8187 	if (r) {
8188 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8189 		goto error;
8190 	}
8191 	return;
8192 
8193 error:
8194 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8195 }
8196 
8197 static void cik_uvd_resume(struct radeon_device *rdev)
8198 {
8199 	struct radeon_ring *ring;
8200 	int r;
8201 
8202 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8203 		return;
8204 
8205 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8206 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8207 	if (r) {
8208 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8209 		return;
8210 	}
8211 	r = uvd_v1_0_init(rdev);
8212 	if (r) {
8213 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8214 		return;
8215 	}
8216 }
8217 
8218 static void cik_vce_init(struct radeon_device *rdev)
8219 {
8220 	int r;
8221 
8222 	if (!rdev->has_vce)
8223 		return;
8224 
8225 	r = radeon_vce_init(rdev);
8226 	if (r) {
8227 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8228 		/*
8229 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8230 		 * to early fails cik_vce_start() and thus nothing happens
8231 		 * there. So it is pointless to try to go through that code
8232 		 * hence why we disable vce here.
8233 		 */
8234 		rdev->has_vce = 0;
8235 		return;
8236 	}
8237 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8238 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8239 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8240 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8241 }
8242 
8243 static void cik_vce_start(struct radeon_device *rdev)
8244 {
8245 	int r;
8246 
8247 	if (!rdev->has_vce)
8248 		return;
8249 
8250 	r = radeon_vce_resume(rdev);
8251 	if (r) {
8252 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8253 		goto error;
8254 	}
8255 	r = vce_v2_0_resume(rdev);
8256 	if (r) {
8257 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258 		goto error;
8259 	}
8260 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8261 	if (r) {
8262 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8263 		goto error;
8264 	}
8265 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8266 	if (r) {
8267 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8268 		goto error;
8269 	}
8270 	return;
8271 
8272 error:
8273 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8274 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8275 }
8276 
8277 static void cik_vce_resume(struct radeon_device *rdev)
8278 {
8279 	struct radeon_ring *ring;
8280 	int r;
8281 
8282 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8283 		return;
8284 
8285 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8286 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8287 	if (r) {
8288 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8289 		return;
8290 	}
8291 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8292 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8293 	if (r) {
8294 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8295 		return;
8296 	}
8297 	r = vce_v1_0_init(rdev);
8298 	if (r) {
8299 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8300 		return;
8301 	}
8302 }
8303 
8304 /**
8305  * cik_startup - program the asic to a functional state
8306  *
8307  * @rdev: radeon_device pointer
8308  *
8309  * Programs the asic to a functional state (CIK).
8310  * Called by cik_init() and cik_resume().
8311  * Returns 0 for success, error for failure.
8312  */
8313 static int cik_startup(struct radeon_device *rdev)
8314 {
8315 	struct radeon_ring *ring;
8316 	u32 nop;
8317 	int r;
8318 
8319 	/* enable pcie gen2/3 link */
8320 	cik_pcie_gen3_enable(rdev);
8321 	/* enable aspm */
8322 	cik_program_aspm(rdev);
8323 
8324 	/* scratch needs to be initialized before MC */
8325 	r = r600_vram_scratch_init(rdev);
8326 	if (r)
8327 		return r;
8328 
8329 	cik_mc_program(rdev);
8330 
8331 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8332 		r = ci_mc_load_microcode(rdev);
8333 		if (r) {
8334 			DRM_ERROR("Failed to load MC firmware!\n");
8335 			return r;
8336 		}
8337 	}
8338 
8339 	r = cik_pcie_gart_enable(rdev);
8340 	if (r)
8341 		return r;
8342 	cik_gpu_init(rdev);
8343 
8344 	/* allocate rlc buffers */
8345 	if (rdev->flags & RADEON_IS_IGP) {
8346 		if (rdev->family == CHIP_KAVERI) {
8347 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8348 			rdev->rlc.reg_list_size =
8349 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8350 		} else {
8351 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8352 			rdev->rlc.reg_list_size =
8353 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8354 		}
8355 	}
8356 	rdev->rlc.cs_data = ci_cs_data;
8357 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8358 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8359 	r = sumo_rlc_init(rdev);
8360 	if (r) {
8361 		DRM_ERROR("Failed to init rlc BOs!\n");
8362 		return r;
8363 	}
8364 
8365 	/* allocate wb buffer */
8366 	r = radeon_wb_init(rdev);
8367 	if (r)
8368 		return r;
8369 
8370 	/* allocate mec buffers */
8371 	r = cik_mec_init(rdev);
8372 	if (r) {
8373 		DRM_ERROR("Failed to init MEC BOs!\n");
8374 		return r;
8375 	}
8376 
8377 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8378 	if (r) {
8379 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8380 		return r;
8381 	}
8382 
8383 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8384 	if (r) {
8385 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8386 		return r;
8387 	}
8388 
8389 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8390 	if (r) {
8391 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392 		return r;
8393 	}
8394 
8395 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8396 	if (r) {
8397 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8398 		return r;
8399 	}
8400 
8401 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8402 	if (r) {
8403 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8404 		return r;
8405 	}
8406 
8407 	cik_uvd_start(rdev);
8408 	cik_vce_start(rdev);
8409 
8410 	/* Enable IRQ */
8411 	if (!rdev->irq.installed) {
8412 		r = radeon_irq_kms_init(rdev);
8413 		if (r)
8414 			return r;
8415 	}
8416 
8417 	r = cik_irq_init(rdev);
8418 	if (r) {
8419 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8420 		radeon_irq_kms_fini(rdev);
8421 		return r;
8422 	}
8423 	cik_irq_set(rdev);
8424 
8425 	if (rdev->family == CHIP_HAWAII) {
8426 		if (rdev->new_fw)
8427 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8428 		else
8429 			nop = RADEON_CP_PACKET2;
8430 	} else {
8431 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8432 	}
8433 
8434 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8435 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8436 			     nop);
8437 	if (r)
8438 		return r;
8439 
8440 	/* set up the compute queues */
8441 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8442 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8443 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8444 			     nop);
8445 	if (r)
8446 		return r;
8447 	ring->me = 1; /* first MEC */
8448 	ring->pipe = 0; /* first pipe */
8449 	ring->queue = 0; /* first queue */
8450 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8451 
8452 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8453 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8454 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8455 			     nop);
8456 	if (r)
8457 		return r;
8458 	/* dGPU only have 1 MEC */
8459 	ring->me = 1; /* first MEC */
8460 	ring->pipe = 0; /* first pipe */
8461 	ring->queue = 1; /* second queue */
8462 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8463 
8464 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8465 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8466 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8467 	if (r)
8468 		return r;
8469 
8470 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8471 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8472 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8473 	if (r)
8474 		return r;
8475 
8476 	r = cik_cp_resume(rdev);
8477 	if (r)
8478 		return r;
8479 
8480 	r = cik_sdma_resume(rdev);
8481 	if (r)
8482 		return r;
8483 
8484 	cik_uvd_resume(rdev);
8485 	cik_vce_resume(rdev);
8486 
8487 	r = radeon_ib_pool_init(rdev);
8488 	if (r) {
8489 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8490 		return r;
8491 	}
8492 
8493 	r = radeon_vm_manager_init(rdev);
8494 	if (r) {
8495 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8496 		return r;
8497 	}
8498 
8499 	r = radeon_audio_init(rdev);
8500 	if (r)
8501 		return r;
8502 
8503 	r = radeon_kfd_resume(rdev);
8504 	if (r)
8505 		return r;
8506 
8507 	return 0;
8508 }
8509 
8510 /**
8511  * cik_resume - resume the asic to a functional state
8512  *
8513  * @rdev: radeon_device pointer
8514  *
8515  * Programs the asic to a functional state (CIK).
8516  * Called at resume.
8517  * Returns 0 for success, error for failure.
8518  */
8519 int cik_resume(struct radeon_device *rdev)
8520 {
8521 	int r;
8522 
8523 	/* post card */
8524 	atom_asic_init(rdev->mode_info.atom_context);
8525 
8526 	/* init golden registers */
8527 	cik_init_golden_registers(rdev);
8528 
8529 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8530 		radeon_pm_resume(rdev);
8531 
8532 	rdev->accel_working = true;
8533 	r = cik_startup(rdev);
8534 	if (r) {
8535 		DRM_ERROR("cik startup failed on resume\n");
8536 		rdev->accel_working = false;
8537 		return r;
8538 	}
8539 
8540 	return r;
8541 
8542 }
8543 
8544 /**
8545  * cik_suspend - suspend the asic
8546  *
8547  * @rdev: radeon_device pointer
8548  *
8549  * Bring the chip into a state suitable for suspend (CIK).
8550  * Called at suspend.
8551  * Returns 0 for success.
8552  */
8553 int cik_suspend(struct radeon_device *rdev)
8554 {
8555 	radeon_kfd_suspend(rdev);
8556 	radeon_pm_suspend(rdev);
8557 	radeon_audio_fini(rdev);
8558 	radeon_vm_manager_fini(rdev);
8559 	cik_cp_enable(rdev, false);
8560 	cik_sdma_enable(rdev, false);
8561 	if (rdev->has_uvd) {
8562 		uvd_v1_0_fini(rdev);
8563 		radeon_uvd_suspend(rdev);
8564 	}
8565 	if (rdev->has_vce)
8566 		radeon_vce_suspend(rdev);
8567 	cik_fini_pg(rdev);
8568 	cik_fini_cg(rdev);
8569 	cik_irq_suspend(rdev);
8570 	radeon_wb_disable(rdev);
8571 	cik_pcie_gart_disable(rdev);
8572 	return 0;
8573 }
8574 
8575 /* Plan is to move initialization in that function and use
8576  * helper function so that radeon_device_init pretty much
8577  * do nothing more than calling asic specific function. This
8578  * should also allow to remove a bunch of callback function
8579  * like vram_info.
8580  */
8581 /**
8582  * cik_init - asic specific driver and hw init
8583  *
8584  * @rdev: radeon_device pointer
8585  *
8586  * Setup asic specific driver variables and program the hw
8587  * to a functional state (CIK).
8588  * Called at driver startup.
8589  * Returns 0 for success, errors for failure.
8590  */
8591 int cik_init(struct radeon_device *rdev)
8592 {
8593 	struct radeon_ring *ring;
8594 	int r;
8595 
8596 	/* Read BIOS */
8597 	if (!radeon_get_bios(rdev)) {
8598 		if (ASIC_IS_AVIVO(rdev))
8599 			return -EINVAL;
8600 	}
8601 	/* Must be an ATOMBIOS */
8602 	if (!rdev->is_atom_bios) {
8603 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8604 		return -EINVAL;
8605 	}
8606 	r = radeon_atombios_init(rdev);
8607 	if (r)
8608 		return r;
8609 
8610 	/* Post card if necessary */
8611 	if (!radeon_card_posted(rdev)) {
8612 		if (!rdev->bios) {
8613 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8614 			return -EINVAL;
8615 		}
8616 		DRM_INFO("GPU not posted. posting now...\n");
8617 		atom_asic_init(rdev->mode_info.atom_context);
8618 	}
8619 	/* init golden registers */
8620 	cik_init_golden_registers(rdev);
8621 	/* Initialize scratch registers */
8622 	cik_scratch_init(rdev);
8623 	/* Initialize surface registers */
8624 	radeon_surface_init(rdev);
8625 	/* Initialize clocks */
8626 	radeon_get_clock_info(rdev->ddev);
8627 
8628 	/* Fence driver */
8629 	r = radeon_fence_driver_init(rdev);
8630 	if (r)
8631 		return r;
8632 
8633 	/* initialize memory controller */
8634 	r = cik_mc_init(rdev);
8635 	if (r)
8636 		return r;
8637 	/* Memory manager */
8638 	r = radeon_bo_init(rdev);
8639 	if (r)
8640 		return r;
8641 
8642 	if (rdev->flags & RADEON_IS_IGP) {
8643 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8644 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8645 			r = cik_init_microcode(rdev);
8646 			if (r) {
8647 				DRM_ERROR("Failed to load firmware!\n");
8648 				return r;
8649 			}
8650 		}
8651 	} else {
8652 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8653 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8654 		    !rdev->mc_fw) {
8655 			r = cik_init_microcode(rdev);
8656 			if (r) {
8657 				DRM_ERROR("Failed to load firmware!\n");
8658 				return r;
8659 			}
8660 		}
8661 	}
8662 
8663 	/* Initialize power management */
8664 	radeon_pm_init(rdev);
8665 
8666 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8667 	ring->ring_obj = NULL;
8668 	r600_ring_init(rdev, ring, 1024 * 1024);
8669 
8670 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8671 	ring->ring_obj = NULL;
8672 	r600_ring_init(rdev, ring, 1024 * 1024);
8673 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8674 	if (r)
8675 		return r;
8676 
8677 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8678 	ring->ring_obj = NULL;
8679 	r600_ring_init(rdev, ring, 1024 * 1024);
8680 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8681 	if (r)
8682 		return r;
8683 
8684 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685 	ring->ring_obj = NULL;
8686 	r600_ring_init(rdev, ring, 256 * 1024);
8687 
8688 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8689 	ring->ring_obj = NULL;
8690 	r600_ring_init(rdev, ring, 256 * 1024);
8691 
8692 	cik_uvd_init(rdev);
8693 	cik_vce_init(rdev);
8694 
8695 	rdev->ih.ring_obj = NULL;
8696 	r600_ih_ring_init(rdev, 64 * 1024);
8697 
8698 	r = r600_pcie_gart_init(rdev);
8699 	if (r)
8700 		return r;
8701 
8702 	rdev->accel_working = true;
8703 	r = cik_startup(rdev);
8704 	if (r) {
8705 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8706 		cik_cp_fini(rdev);
8707 		cik_sdma_fini(rdev);
8708 		cik_irq_fini(rdev);
8709 		sumo_rlc_fini(rdev);
8710 		cik_mec_fini(rdev);
8711 		radeon_wb_fini(rdev);
8712 		radeon_ib_pool_fini(rdev);
8713 		radeon_vm_manager_fini(rdev);
8714 		radeon_irq_kms_fini(rdev);
8715 		cik_pcie_gart_fini(rdev);
8716 		rdev->accel_working = false;
8717 	}
8718 
8719 	/* Don't start up if the MC ucode is missing.
8720 	 * The default clocks and voltages before the MC ucode
8721 	 * is loaded are not suffient for advanced operations.
8722 	 */
8723 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8724 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8725 		return -EINVAL;
8726 	}
8727 
8728 	return 0;
8729 }
8730 
8731 /**
8732  * cik_fini - asic specific driver and hw fini
8733  *
8734  * @rdev: radeon_device pointer
8735  *
8736  * Tear down the asic specific driver variables and program the hw
8737  * to an idle state (CIK).
8738  * Called at driver unload.
8739  */
8740 void cik_fini(struct radeon_device *rdev)
8741 {
8742 	radeon_pm_fini(rdev);
8743 	cik_cp_fini(rdev);
8744 	cik_sdma_fini(rdev);
8745 	cik_fini_pg(rdev);
8746 	cik_fini_cg(rdev);
8747 	cik_irq_fini(rdev);
8748 	sumo_rlc_fini(rdev);
8749 	cik_mec_fini(rdev);
8750 	radeon_wb_fini(rdev);
8751 	radeon_vm_manager_fini(rdev);
8752 	radeon_ib_pool_fini(rdev);
8753 	radeon_irq_kms_fini(rdev);
8754 	uvd_v1_0_fini(rdev);
8755 	radeon_uvd_fini(rdev);
8756 	radeon_vce_fini(rdev);
8757 	cik_pcie_gart_fini(rdev);
8758 	r600_vram_scratch_fini(rdev);
8759 	radeon_gem_fini(rdev);
8760 	radeon_fence_driver_fini(rdev);
8761 	radeon_bo_fini(rdev);
8762 	radeon_atombios_fini(rdev);
8763 	kfree(rdev->bios);
8764 	rdev->bios = NULL;
8765 }
8766 
8767 void dce8_program_fmt(struct drm_encoder *encoder)
8768 {
8769 	struct drm_device *dev = encoder->dev;
8770 	struct radeon_device *rdev = dev->dev_private;
8771 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8772 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8773 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8774 	int bpc = 0;
8775 	u32 tmp = 0;
8776 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8777 
8778 	if (connector) {
8779 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8780 		bpc = radeon_get_monitor_bpc(connector);
8781 		dither = radeon_connector->dither;
8782 	}
8783 
8784 	/* LVDS/eDP FMT is set up by atom */
8785 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8786 		return;
8787 
8788 	/* not needed for analog */
8789 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8790 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8791 		return;
8792 
8793 	if (bpc == 0)
8794 		return;
8795 
8796 	switch (bpc) {
8797 	case 6:
8798 		if (dither == RADEON_FMT_DITHER_ENABLE)
8799 			/* XXX sort out optimal dither settings */
8800 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8801 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8802 		else
8803 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8804 		break;
8805 	case 8:
8806 		if (dither == RADEON_FMT_DITHER_ENABLE)
8807 			/* XXX sort out optimal dither settings */
8808 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8809 				FMT_RGB_RANDOM_ENABLE |
8810 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8811 		else
8812 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8813 		break;
8814 	case 10:
8815 		if (dither == RADEON_FMT_DITHER_ENABLE)
8816 			/* XXX sort out optimal dither settings */
8817 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818 				FMT_RGB_RANDOM_ENABLE |
8819 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8820 		else
8821 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8822 		break;
8823 	default:
8824 		/* not needed */
8825 		break;
8826 	}
8827 
8828 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8829 }
8830 
8831 /* display watermark setup */
8832 /**
8833  * dce8_line_buffer_adjust - Set up the line buffer
8834  *
8835  * @rdev: radeon_device pointer
8836  * @radeon_crtc: the selected display controller
8837  * @mode: the current display mode on the selected display
8838  * controller
8839  *
8840  * Setup up the line buffer allocation for
8841  * the selected display controller (CIK).
8842  * Returns the line buffer size in pixels.
8843  */
8844 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8845 				   struct radeon_crtc *radeon_crtc,
8846 				   struct drm_display_mode *mode)
8847 {
8848 	u32 tmp, buffer_alloc, i;
8849 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8850 	/*
8851 	 * Line Buffer Setup
8852 	 * There are 6 line buffers, one for each display controllers.
8853 	 * There are 3 partitions per LB. Select the number of partitions
8854 	 * to enable based on the display width.  For display widths larger
8855 	 * than 4096, you need use to use 2 display controllers and combine
8856 	 * them using the stereo blender.
8857 	 */
8858 	if (radeon_crtc->base.enabled && mode) {
8859 		if (mode->crtc_hdisplay < 1920) {
8860 			tmp = 1;
8861 			buffer_alloc = 2;
8862 		} else if (mode->crtc_hdisplay < 2560) {
8863 			tmp = 2;
8864 			buffer_alloc = 2;
8865 		} else if (mode->crtc_hdisplay < 4096) {
8866 			tmp = 0;
8867 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8868 		} else {
8869 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8870 			tmp = 0;
8871 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872 		}
8873 	} else {
8874 		tmp = 1;
8875 		buffer_alloc = 0;
8876 	}
8877 
8878 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8879 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8880 
8881 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8882 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8883 	for (i = 0; i < rdev->usec_timeout; i++) {
8884 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8885 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8886 			break;
8887 		udelay(1);
8888 	}
8889 
8890 	if (radeon_crtc->base.enabled && mode) {
8891 		switch (tmp) {
8892 		case 0:
8893 		default:
8894 			return 4096 * 2;
8895 		case 1:
8896 			return 1920 * 2;
8897 		case 2:
8898 			return 2560 * 2;
8899 		}
8900 	}
8901 
8902 	/* controller not enabled, so no lb used */
8903 	return 0;
8904 }
8905 
8906 /**
8907  * cik_get_number_of_dram_channels - get the number of dram channels
8908  *
8909  * @rdev: radeon_device pointer
8910  *
8911  * Look up the number of video ram channels (CIK).
8912  * Used for display watermark bandwidth calculations
8913  * Returns the number of dram channels
8914  */
8915 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8916 {
8917 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8918 
8919 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8920 	case 0:
8921 	default:
8922 		return 1;
8923 	case 1:
8924 		return 2;
8925 	case 2:
8926 		return 4;
8927 	case 3:
8928 		return 8;
8929 	case 4:
8930 		return 3;
8931 	case 5:
8932 		return 6;
8933 	case 6:
8934 		return 10;
8935 	case 7:
8936 		return 12;
8937 	case 8:
8938 		return 16;
8939 	}
8940 }
8941 
8942 struct dce8_wm_params {
8943 	u32 dram_channels; /* number of dram channels */
8944 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8945 	u32 sclk;          /* engine clock in kHz */
8946 	u32 disp_clk;      /* display clock in kHz */
8947 	u32 src_width;     /* viewport width */
8948 	u32 active_time;   /* active display time in ns */
8949 	u32 blank_time;    /* blank time in ns */
8950 	bool interlaced;    /* mode is interlaced */
8951 	fixed20_12 vsc;    /* vertical scale ratio */
8952 	u32 num_heads;     /* number of active crtcs */
8953 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8954 	u32 lb_size;       /* line buffer allocated to pipe */
8955 	u32 vtaps;         /* vertical scaler taps */
8956 };
8957 
8958 /**
8959  * dce8_dram_bandwidth - get the dram bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the raw dram bandwidth (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the dram bandwidth in MBytes/s
8966  */
8967 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8968 {
8969 	/* Calculate raw DRAM Bandwidth */
8970 	fixed20_12 dram_efficiency; /* 0.7 */
8971 	fixed20_12 yclk, dram_channels, bandwidth;
8972 	fixed20_12 a;
8973 
8974 	a.full = dfixed_const(1000);
8975 	yclk.full = dfixed_const(wm->yclk);
8976 	yclk.full = dfixed_div(yclk, a);
8977 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8978 	a.full = dfixed_const(10);
8979 	dram_efficiency.full = dfixed_const(7);
8980 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8981 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8982 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8983 
8984 	return dfixed_trunc(bandwidth);
8985 }
8986 
8987 /**
8988  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dram bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dram bandwidth for display in MBytes/s
8995  */
8996 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8997 {
8998 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8999 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9000 	fixed20_12 yclk, dram_channels, bandwidth;
9001 	fixed20_12 a;
9002 
9003 	a.full = dfixed_const(1000);
9004 	yclk.full = dfixed_const(wm->yclk);
9005 	yclk.full = dfixed_div(yclk, a);
9006 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9007 	a.full = dfixed_const(10);
9008 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9009 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9010 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9011 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9012 
9013 	return dfixed_trunc(bandwidth);
9014 }
9015 
9016 /**
9017  * dce8_data_return_bandwidth - get the data return bandwidth
9018  *
9019  * @wm: watermark calculation data
9020  *
9021  * Calculate the data return bandwidth used for display (CIK).
9022  * Used for display watermark bandwidth calculations
9023  * Returns the data return bandwidth in MBytes/s
9024  */
9025 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9026 {
9027 	/* Calculate the display Data return Bandwidth */
9028 	fixed20_12 return_efficiency; /* 0.8 */
9029 	fixed20_12 sclk, bandwidth;
9030 	fixed20_12 a;
9031 
9032 	a.full = dfixed_const(1000);
9033 	sclk.full = dfixed_const(wm->sclk);
9034 	sclk.full = dfixed_div(sclk, a);
9035 	a.full = dfixed_const(10);
9036 	return_efficiency.full = dfixed_const(8);
9037 	return_efficiency.full = dfixed_div(return_efficiency, a);
9038 	a.full = dfixed_const(32);
9039 	bandwidth.full = dfixed_mul(a, sclk);
9040 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9041 
9042 	return dfixed_trunc(bandwidth);
9043 }
9044 
9045 /**
9046  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9047  *
9048  * @wm: watermark calculation data
9049  *
9050  * Calculate the dmif bandwidth used for display (CIK).
9051  * Used for display watermark bandwidth calculations
9052  * Returns the dmif bandwidth in MBytes/s
9053  */
9054 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9055 {
9056 	/* Calculate the DMIF Request Bandwidth */
9057 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9058 	fixed20_12 disp_clk, bandwidth;
9059 	fixed20_12 a, b;
9060 
9061 	a.full = dfixed_const(1000);
9062 	disp_clk.full = dfixed_const(wm->disp_clk);
9063 	disp_clk.full = dfixed_div(disp_clk, a);
9064 	a.full = dfixed_const(32);
9065 	b.full = dfixed_mul(a, disp_clk);
9066 
9067 	a.full = dfixed_const(10);
9068 	disp_clk_request_efficiency.full = dfixed_const(8);
9069 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9070 
9071 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9072 
9073 	return dfixed_trunc(bandwidth);
9074 }
9075 
9076 /**
9077  * dce8_available_bandwidth - get the min available bandwidth
9078  *
9079  * @wm: watermark calculation data
9080  *
9081  * Calculate the min available bandwidth used for display (CIK).
9082  * Used for display watermark bandwidth calculations
9083  * Returns the min available bandwidth in MBytes/s
9084  */
9085 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9086 {
9087 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9088 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9089 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9090 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9091 
9092 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9093 }
9094 
9095 /**
9096  * dce8_average_bandwidth - get the average available bandwidth
9097  *
9098  * @wm: watermark calculation data
9099  *
9100  * Calculate the average available bandwidth used for display (CIK).
9101  * Used for display watermark bandwidth calculations
9102  * Returns the average available bandwidth in MBytes/s
9103  */
9104 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9105 {
9106 	/* Calculate the display mode Average Bandwidth
9107 	 * DisplayMode should contain the source and destination dimensions,
9108 	 * timing, etc.
9109 	 */
9110 	fixed20_12 bpp;
9111 	fixed20_12 line_time;
9112 	fixed20_12 src_width;
9113 	fixed20_12 bandwidth;
9114 	fixed20_12 a;
9115 
9116 	a.full = dfixed_const(1000);
9117 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9118 	line_time.full = dfixed_div(line_time, a);
9119 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9120 	src_width.full = dfixed_const(wm->src_width);
9121 	bandwidth.full = dfixed_mul(src_width, bpp);
9122 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9123 	bandwidth.full = dfixed_div(bandwidth, line_time);
9124 
9125 	return dfixed_trunc(bandwidth);
9126 }
9127 
9128 /**
9129  * dce8_latency_watermark - get the latency watermark
9130  *
9131  * @wm: watermark calculation data
9132  *
9133  * Calculate the latency watermark (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the latency watermark in ns
9136  */
9137 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9138 {
9139 	/* First calculate the latency in ns */
9140 	u32 mc_latency = 2000; /* 2000 ns. */
9141 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9142 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9143 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9144 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9145 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9146 		(wm->num_heads * cursor_line_pair_return_time);
9147 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9148 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9149 	u32 tmp, dmif_size = 12288;
9150 	fixed20_12 a, b, c;
9151 
9152 	if (wm->num_heads == 0)
9153 		return 0;
9154 
9155 	a.full = dfixed_const(2);
9156 	b.full = dfixed_const(1);
9157 	if ((wm->vsc.full > a.full) ||
9158 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9159 	    (wm->vtaps >= 5) ||
9160 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9161 		max_src_lines_per_dst_line = 4;
9162 	else
9163 		max_src_lines_per_dst_line = 2;
9164 
9165 	a.full = dfixed_const(available_bandwidth);
9166 	b.full = dfixed_const(wm->num_heads);
9167 	a.full = dfixed_div(a, b);
9168 
9169 	b.full = dfixed_const(mc_latency + 512);
9170 	c.full = dfixed_const(wm->disp_clk);
9171 	b.full = dfixed_div(b, c);
9172 
9173 	c.full = dfixed_const(dmif_size);
9174 	b.full = dfixed_div(c, b);
9175 
9176 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9177 
9178 	b.full = dfixed_const(1000);
9179 	c.full = dfixed_const(wm->disp_clk);
9180 	b.full = dfixed_div(c, b);
9181 	c.full = dfixed_const(wm->bytes_per_pixel);
9182 	b.full = dfixed_mul(b, c);
9183 
9184 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9185 
9186 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9187 	b.full = dfixed_const(1000);
9188 	c.full = dfixed_const(lb_fill_bw);
9189 	b.full = dfixed_div(c, b);
9190 	a.full = dfixed_div(a, b);
9191 	line_fill_time = dfixed_trunc(a);
9192 
9193 	if (line_fill_time < wm->active_time)
9194 		return latency;
9195 	else
9196 		return latency + (line_fill_time - wm->active_time);
9197 
9198 }
9199 
9200 /**
9201  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9202  * average and available dram bandwidth
9203  *
9204  * @wm: watermark calculation data
9205  *
9206  * Check if the display average bandwidth fits in the display
9207  * dram bandwidth (CIK).
9208  * Used for display watermark bandwidth calculations
9209  * Returns true if the display fits, false if not.
9210  */
9211 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9212 {
9213 	if (dce8_average_bandwidth(wm) <=
9214 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9215 		return true;
9216 	else
9217 		return false;
9218 }
9219 
9220 /**
9221  * dce8_average_bandwidth_vs_available_bandwidth - check
9222  * average and available bandwidth
9223  *
9224  * @wm: watermark calculation data
9225  *
9226  * Check if the display average bandwidth fits in the display
9227  * available bandwidth (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns true if the display fits, false if not.
9230  */
9231 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9232 {
9233 	if (dce8_average_bandwidth(wm) <=
9234 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9235 		return true;
9236 	else
9237 		return false;
9238 }
9239 
9240 /**
9241  * dce8_check_latency_hiding - check latency hiding
9242  *
9243  * @wm: watermark calculation data
9244  *
9245  * Check latency hiding (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9250 {
9251 	u32 lb_partitions = wm->lb_size / wm->src_width;
9252 	u32 line_time = wm->active_time + wm->blank_time;
9253 	u32 latency_tolerant_lines;
9254 	u32 latency_hiding;
9255 	fixed20_12 a;
9256 
9257 	a.full = dfixed_const(1);
9258 	if (wm->vsc.full > a.full)
9259 		latency_tolerant_lines = 1;
9260 	else {
9261 		if (lb_partitions <= (wm->vtaps + 1))
9262 			latency_tolerant_lines = 1;
9263 		else
9264 			latency_tolerant_lines = 2;
9265 	}
9266 
9267 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9268 
9269 	if (dce8_latency_watermark(wm) <= latency_hiding)
9270 		return true;
9271 	else
9272 		return false;
9273 }
9274 
9275 /**
9276  * dce8_program_watermarks - program display watermarks
9277  *
9278  * @rdev: radeon_device pointer
9279  * @radeon_crtc: the selected display controller
9280  * @lb_size: line buffer size
9281  * @num_heads: number of display controllers in use
9282  *
9283  * Calculate and program the display watermarks for the
9284  * selected display controller (CIK).
9285  */
9286 static void dce8_program_watermarks(struct radeon_device *rdev,
9287 				    struct radeon_crtc *radeon_crtc,
9288 				    u32 lb_size, u32 num_heads)
9289 {
9290 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9291 	struct dce8_wm_params wm_low, wm_high;
9292 	u32 pixel_period;
9293 	u32 line_time = 0;
9294 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9295 	u32 tmp, wm_mask;
9296 
9297 	if (radeon_crtc->base.enabled && num_heads && mode) {
9298 		pixel_period = 1000000 / (u32)mode->clock;
9299 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9300 
9301 		/* watermark for high clocks */
9302 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9303 		    rdev->pm.dpm_enabled) {
9304 			wm_high.yclk =
9305 				radeon_dpm_get_mclk(rdev, false) * 10;
9306 			wm_high.sclk =
9307 				radeon_dpm_get_sclk(rdev, false) * 10;
9308 		} else {
9309 			wm_high.yclk = rdev->pm.current_mclk * 10;
9310 			wm_high.sclk = rdev->pm.current_sclk * 10;
9311 		}
9312 
9313 		wm_high.disp_clk = mode->clock;
9314 		wm_high.src_width = mode->crtc_hdisplay;
9315 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9316 		wm_high.blank_time = line_time - wm_high.active_time;
9317 		wm_high.interlaced = false;
9318 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9319 			wm_high.interlaced = true;
9320 		wm_high.vsc = radeon_crtc->vsc;
9321 		wm_high.vtaps = 1;
9322 		if (radeon_crtc->rmx_type != RMX_OFF)
9323 			wm_high.vtaps = 2;
9324 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9325 		wm_high.lb_size = lb_size;
9326 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9327 		wm_high.num_heads = num_heads;
9328 
9329 		/* set for high clocks */
9330 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9331 
9332 		/* possibly force display priority to high */
9333 		/* should really do this at mode validation time... */
9334 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9335 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9336 		    !dce8_check_latency_hiding(&wm_high) ||
9337 		    (rdev->disp_priority == 2)) {
9338 			DRM_DEBUG_KMS("force priority to high\n");
9339 		}
9340 
9341 		/* watermark for low clocks */
9342 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9343 		    rdev->pm.dpm_enabled) {
9344 			wm_low.yclk =
9345 				radeon_dpm_get_mclk(rdev, true) * 10;
9346 			wm_low.sclk =
9347 				radeon_dpm_get_sclk(rdev, true) * 10;
9348 		} else {
9349 			wm_low.yclk = rdev->pm.current_mclk * 10;
9350 			wm_low.sclk = rdev->pm.current_sclk * 10;
9351 		}
9352 
9353 		wm_low.disp_clk = mode->clock;
9354 		wm_low.src_width = mode->crtc_hdisplay;
9355 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9356 		wm_low.blank_time = line_time - wm_low.active_time;
9357 		wm_low.interlaced = false;
9358 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9359 			wm_low.interlaced = true;
9360 		wm_low.vsc = radeon_crtc->vsc;
9361 		wm_low.vtaps = 1;
9362 		if (radeon_crtc->rmx_type != RMX_OFF)
9363 			wm_low.vtaps = 2;
9364 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9365 		wm_low.lb_size = lb_size;
9366 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9367 		wm_low.num_heads = num_heads;
9368 
9369 		/* set for low clocks */
9370 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9371 
9372 		/* possibly force display priority to high */
9373 		/* should really do this at mode validation time... */
9374 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9375 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9376 		    !dce8_check_latency_hiding(&wm_low) ||
9377 		    (rdev->disp_priority == 2)) {
9378 			DRM_DEBUG_KMS("force priority to high\n");
9379 		}
9380 
9381 		/* Save number of lines the linebuffer leads before the scanout */
9382 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9383 	}
9384 
9385 	/* select wm A */
9386 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9387 	tmp = wm_mask;
9388 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9389 	tmp |= LATENCY_WATERMARK_MASK(1);
9390 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9391 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9392 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9393 		LATENCY_HIGH_WATERMARK(line_time)));
9394 	/* select wm B */
9395 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9396 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9397 	tmp |= LATENCY_WATERMARK_MASK(2);
9398 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9399 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9400 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9401 		LATENCY_HIGH_WATERMARK(line_time)));
9402 	/* restore original selection */
9403 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9404 
9405 	/* save values for DPM */
9406 	radeon_crtc->line_time = line_time;
9407 	radeon_crtc->wm_high = latency_watermark_a;
9408 	radeon_crtc->wm_low = latency_watermark_b;
9409 }
9410 
9411 /**
9412  * dce8_bandwidth_update - program display watermarks
9413  *
9414  * @rdev: radeon_device pointer
9415  *
9416  * Calculate and program the display watermarks and line
9417  * buffer allocation (CIK).
9418  */
9419 void dce8_bandwidth_update(struct radeon_device *rdev)
9420 {
9421 	struct drm_display_mode *mode = NULL;
9422 	u32 num_heads = 0, lb_size;
9423 	int i;
9424 
9425 	if (!rdev->mode_info.mode_config_initialized)
9426 		return;
9427 
9428 	radeon_update_display_priority(rdev);
9429 
9430 	for (i = 0; i < rdev->num_crtc; i++) {
9431 		if (rdev->mode_info.crtcs[i]->base.enabled)
9432 			num_heads++;
9433 	}
9434 	for (i = 0; i < rdev->num_crtc; i++) {
9435 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9436 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9437 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9438 	}
9439 }
9440 
9441 /**
9442  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9443  *
9444  * @rdev: radeon_device pointer
9445  *
9446  * Fetches a GPU clock counter snapshot (SI).
9447  * Returns the 64 bit clock counter snapshot.
9448  */
9449 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9450 {
9451 	uint64_t clock;
9452 
9453 	mutex_lock(&rdev->gpu_clock_mutex);
9454 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9455 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9456 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9457 	mutex_unlock(&rdev->gpu_clock_mutex);
9458 	return clock;
9459 }
9460 
9461 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9462 			     u32 cntl_reg, u32 status_reg)
9463 {
9464 	int r, i;
9465 	struct atom_clock_dividers dividers;
9466 	uint32_t tmp;
9467 
9468 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9469 					   clock, false, &dividers);
9470 	if (r)
9471 		return r;
9472 
9473 	tmp = RREG32_SMC(cntl_reg);
9474 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9475 	tmp |= dividers.post_divider;
9476 	WREG32_SMC(cntl_reg, tmp);
9477 
9478 	for (i = 0; i < 100; i++) {
9479 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9480 			break;
9481 		mdelay(10);
9482 	}
9483 	if (i == 100)
9484 		return -ETIMEDOUT;
9485 
9486 	return 0;
9487 }
9488 
9489 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9490 {
9491 	int r = 0;
9492 
9493 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9494 	if (r)
9495 		return r;
9496 
9497 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9498 	return r;
9499 }
9500 
9501 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9502 {
9503 	int r, i;
9504 	struct atom_clock_dividers dividers;
9505 	u32 tmp;
9506 
9507 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9508 					   ecclk, false, &dividers);
9509 	if (r)
9510 		return r;
9511 
9512 	for (i = 0; i < 100; i++) {
9513 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9514 			break;
9515 		mdelay(10);
9516 	}
9517 	if (i == 100)
9518 		return -ETIMEDOUT;
9519 
9520 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9521 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9522 	tmp |= dividers.post_divider;
9523 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9524 
9525 	for (i = 0; i < 100; i++) {
9526 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9527 			break;
9528 		mdelay(10);
9529 	}
9530 	if (i == 100)
9531 		return -ETIMEDOUT;
9532 
9533 	return 0;
9534 }
9535 
9536 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9537 {
9538 	struct pci_dev *root = rdev->pdev->bus->self;
9539 	int bridge_pos, gpu_pos;
9540 	u32 speed_cntl, mask, current_data_rate;
9541 	int ret, i;
9542 	u16 tmp16;
9543 
9544 	if (pci_is_root_bus(rdev->pdev->bus))
9545 		return;
9546 
9547 	if (radeon_pcie_gen2 == 0)
9548 		return;
9549 
9550 	if (rdev->flags & RADEON_IS_IGP)
9551 		return;
9552 
9553 	if (!(rdev->flags & RADEON_IS_PCIE))
9554 		return;
9555 
9556 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9557 	if (ret != 0)
9558 		return;
9559 
9560 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9561 		return;
9562 
9563 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9564 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9565 		LC_CURRENT_DATA_RATE_SHIFT;
9566 	if (mask & DRM_PCIE_SPEED_80) {
9567 		if (current_data_rate == 2) {
9568 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9569 			return;
9570 		}
9571 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9572 	} else if (mask & DRM_PCIE_SPEED_50) {
9573 		if (current_data_rate == 1) {
9574 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9575 			return;
9576 		}
9577 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9578 	}
9579 
9580 	bridge_pos = pci_pcie_cap(root);
9581 	if (!bridge_pos)
9582 		return;
9583 
9584 	gpu_pos = pci_pcie_cap(rdev->pdev);
9585 	if (!gpu_pos)
9586 		return;
9587 
9588 	if (mask & DRM_PCIE_SPEED_80) {
9589 		/* re-try equalization if gen3 is not already enabled */
9590 		if (current_data_rate != 2) {
9591 			u16 bridge_cfg, gpu_cfg;
9592 			u16 bridge_cfg2, gpu_cfg2;
9593 			u32 max_lw, current_lw, tmp;
9594 
9595 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9596 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9597 
9598 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9599 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9600 
9601 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9602 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9603 
9604 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9605 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9606 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9607 
9608 			if (current_lw < max_lw) {
9609 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9610 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9611 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9612 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9613 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9614 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9615 				}
9616 			}
9617 
9618 			for (i = 0; i < 10; i++) {
9619 				/* check status */
9620 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9621 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9622 					break;
9623 
9624 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9625 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9626 
9627 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9628 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9629 
9630 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9631 				tmp |= LC_SET_QUIESCE;
9632 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9633 
9634 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635 				tmp |= LC_REDO_EQ;
9636 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637 
9638 				mdelay(100);
9639 
9640 				/* linkctl */
9641 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9642 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9643 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9644 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9645 
9646 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9647 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9648 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9649 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9650 
9651 				/* linkctl2 */
9652 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9653 				tmp16 &= ~((1 << 4) | (7 << 9));
9654 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9655 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9656 
9657 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9658 				tmp16 &= ~((1 << 4) | (7 << 9));
9659 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9660 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9661 
9662 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9663 				tmp &= ~LC_SET_QUIESCE;
9664 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9665 			}
9666 		}
9667 	}
9668 
9669 	/* set the link speed */
9670 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9671 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9672 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9673 
9674 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9675 	tmp16 &= ~0xf;
9676 	if (mask & DRM_PCIE_SPEED_80)
9677 		tmp16 |= 3; /* gen3 */
9678 	else if (mask & DRM_PCIE_SPEED_50)
9679 		tmp16 |= 2; /* gen2 */
9680 	else
9681 		tmp16 |= 1; /* gen1 */
9682 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9683 
9684 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9685 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9686 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9687 
9688 	for (i = 0; i < rdev->usec_timeout; i++) {
9689 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9690 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9691 			break;
9692 		udelay(1);
9693 	}
9694 }
9695 
9696 static void cik_program_aspm(struct radeon_device *rdev)
9697 {
9698 	u32 data, orig;
9699 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9700 	bool disable_clkreq = false;
9701 
9702 	if (radeon_aspm == 0)
9703 		return;
9704 
9705 	/* XXX double check IGPs */
9706 	if (rdev->flags & RADEON_IS_IGP)
9707 		return;
9708 
9709 	if (!(rdev->flags & RADEON_IS_PCIE))
9710 		return;
9711 
9712 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9713 	data &= ~LC_XMIT_N_FTS_MASK;
9714 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9715 	if (orig != data)
9716 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9717 
9718 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9719 	data |= LC_GO_TO_RECOVERY;
9720 	if (orig != data)
9721 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9722 
9723 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9724 	data |= P_IGNORE_EDB_ERR;
9725 	if (orig != data)
9726 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9727 
9728 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9729 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9730 	data |= LC_PMI_TO_L1_DIS;
9731 	if (!disable_l0s)
9732 		data |= LC_L0S_INACTIVITY(7);
9733 
9734 	if (!disable_l1) {
9735 		data |= LC_L1_INACTIVITY(7);
9736 		data &= ~LC_PMI_TO_L1_DIS;
9737 		if (orig != data)
9738 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9739 
9740 		if (!disable_plloff_in_l1) {
9741 			bool clk_req_support;
9742 
9743 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9744 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9745 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9746 			if (orig != data)
9747 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9748 
9749 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9750 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9751 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9752 			if (orig != data)
9753 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9754 
9755 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9756 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9757 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9758 			if (orig != data)
9759 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9760 
9761 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9762 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9763 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9764 			if (orig != data)
9765 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9766 
9767 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9768 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9769 			data |= LC_DYN_LANES_PWR_STATE(3);
9770 			if (orig != data)
9771 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9772 
9773 			if (!disable_clkreq &&
9774 			    !pci_is_root_bus(rdev->pdev->bus)) {
9775 				struct pci_dev *root = rdev->pdev->bus->self;
9776 				u32 lnkcap;
9777 
9778 				clk_req_support = false;
9779 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9780 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9781 					clk_req_support = true;
9782 			} else {
9783 				clk_req_support = false;
9784 			}
9785 
9786 			if (clk_req_support) {
9787 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9788 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9789 				if (orig != data)
9790 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9791 
9792 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9793 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9794 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9795 				if (orig != data)
9796 					WREG32_SMC(THM_CLK_CNTL, data);
9797 
9798 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9799 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9800 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9801 				if (orig != data)
9802 					WREG32_SMC(MISC_CLK_CTRL, data);
9803 
9804 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9805 				data &= ~BCLK_AS_XCLK;
9806 				if (orig != data)
9807 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9808 
9809 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9810 				data &= ~FORCE_BIF_REFCLK_EN;
9811 				if (orig != data)
9812 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9813 
9814 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9815 				data &= ~MPLL_CLKOUT_SEL_MASK;
9816 				data |= MPLL_CLKOUT_SEL(4);
9817 				if (orig != data)
9818 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9819 			}
9820 		}
9821 	} else {
9822 		if (orig != data)
9823 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9824 	}
9825 
9826 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9827 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9828 	if (orig != data)
9829 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9830 
9831 	if (!disable_l0s) {
9832 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9833 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9834 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9835 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9836 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9837 				data &= ~LC_L0S_INACTIVITY_MASK;
9838 				if (orig != data)
9839 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9840 			}
9841 		}
9842 	}
9843 }
9844