xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 4f3db074)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /* get temperature in millidegrees */
178 int ci_get_temp(struct radeon_device *rdev)
179 {
180 	u32 temp;
181 	int actual_temp = 0;
182 
183 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184 		CTF_TEMP_SHIFT;
185 
186 	if (temp & 0x200)
187 		actual_temp = 255;
188 	else
189 		actual_temp = temp & 0x1ff;
190 
191 	actual_temp = actual_temp * 1000;
192 
193 	return actual_temp;
194 }
195 
196 /* get temperature in millidegrees */
197 int kv_get_temp(struct radeon_device *rdev)
198 {
199 	u32 temp;
200 	int actual_temp = 0;
201 
202 	temp = RREG32_SMC(0xC0300E0C);
203 
204 	if (temp)
205 		actual_temp = (temp / 8) - 49;
206 	else
207 		actual_temp = 0;
208 
209 	actual_temp = actual_temp * 1000;
210 
211 	return actual_temp;
212 }
213 
214 /*
215  * Indirect registers accessor
216  */
217 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218 {
219 	unsigned long flags;
220 	u32 r;
221 
222 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223 	WREG32(PCIE_INDEX, reg);
224 	(void)RREG32(PCIE_INDEX);
225 	r = RREG32(PCIE_DATA);
226 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227 	return r;
228 }
229 
230 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231 {
232 	unsigned long flags;
233 
234 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235 	WREG32(PCIE_INDEX, reg);
236 	(void)RREG32(PCIE_INDEX);
237 	WREG32(PCIE_DATA, v);
238 	(void)RREG32(PCIE_DATA);
239 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240 }
241 
242 static const u32 spectre_rlc_save_restore_register_list[] =
243 {
244 	(0x0e00 << 16) | (0xc12c >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc140 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc150 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc15c >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc168 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc170 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc178 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc204 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc2b4 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc2b8 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2bc >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2c0 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0x8228 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0x829c >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0x869c >> 2),
273 	0x00000000,
274 	(0x0600 << 16) | (0x98f4 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x98f8 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x9900 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc260 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x90e8 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3c000 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x3c00c >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x8c1c >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x9700 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xcd20 >> 2),
293 	0x00000000,
294 	(0x4e00 << 16) | (0xcd20 >> 2),
295 	0x00000000,
296 	(0x5e00 << 16) | (0xcd20 >> 2),
297 	0x00000000,
298 	(0x6e00 << 16) | (0xcd20 >> 2),
299 	0x00000000,
300 	(0x7e00 << 16) | (0xcd20 >> 2),
301 	0x00000000,
302 	(0x8e00 << 16) | (0xcd20 >> 2),
303 	0x00000000,
304 	(0x9e00 << 16) | (0xcd20 >> 2),
305 	0x00000000,
306 	(0xae00 << 16) | (0xcd20 >> 2),
307 	0x00000000,
308 	(0xbe00 << 16) | (0xcd20 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x89bc >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x8900 >> 2),
313 	0x00000000,
314 	0x3,
315 	(0x0e00 << 16) | (0xc130 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xc134 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0xc1fc >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xc208 >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0xc264 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc268 >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc26c >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0xc270 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0xc274 >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0xc278 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0xc27c >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc280 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc284 >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc288 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc28c >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc290 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc294 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc298 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc29c >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc2a0 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc2a4 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc2a8 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc2ac  >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc2b0 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0x301d0 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0x30238 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0x30250 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x30254 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x30258 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0x3025c >> 2),
374 	0x00000000,
375 	(0x4e00 << 16) | (0xc900 >> 2),
376 	0x00000000,
377 	(0x5e00 << 16) | (0xc900 >> 2),
378 	0x00000000,
379 	(0x6e00 << 16) | (0xc900 >> 2),
380 	0x00000000,
381 	(0x7e00 << 16) | (0xc900 >> 2),
382 	0x00000000,
383 	(0x8e00 << 16) | (0xc900 >> 2),
384 	0x00000000,
385 	(0x9e00 << 16) | (0xc900 >> 2),
386 	0x00000000,
387 	(0xae00 << 16) | (0xc900 >> 2),
388 	0x00000000,
389 	(0xbe00 << 16) | (0xc900 >> 2),
390 	0x00000000,
391 	(0x4e00 << 16) | (0xc904 >> 2),
392 	0x00000000,
393 	(0x5e00 << 16) | (0xc904 >> 2),
394 	0x00000000,
395 	(0x6e00 << 16) | (0xc904 >> 2),
396 	0x00000000,
397 	(0x7e00 << 16) | (0xc904 >> 2),
398 	0x00000000,
399 	(0x8e00 << 16) | (0xc904 >> 2),
400 	0x00000000,
401 	(0x9e00 << 16) | (0xc904 >> 2),
402 	0x00000000,
403 	(0xae00 << 16) | (0xc904 >> 2),
404 	0x00000000,
405 	(0xbe00 << 16) | (0xc904 >> 2),
406 	0x00000000,
407 	(0x4e00 << 16) | (0xc908 >> 2),
408 	0x00000000,
409 	(0x5e00 << 16) | (0xc908 >> 2),
410 	0x00000000,
411 	(0x6e00 << 16) | (0xc908 >> 2),
412 	0x00000000,
413 	(0x7e00 << 16) | (0xc908 >> 2),
414 	0x00000000,
415 	(0x8e00 << 16) | (0xc908 >> 2),
416 	0x00000000,
417 	(0x9e00 << 16) | (0xc908 >> 2),
418 	0x00000000,
419 	(0xae00 << 16) | (0xc908 >> 2),
420 	0x00000000,
421 	(0xbe00 << 16) | (0xc908 >> 2),
422 	0x00000000,
423 	(0x4e00 << 16) | (0xc90c >> 2),
424 	0x00000000,
425 	(0x5e00 << 16) | (0xc90c >> 2),
426 	0x00000000,
427 	(0x6e00 << 16) | (0xc90c >> 2),
428 	0x00000000,
429 	(0x7e00 << 16) | (0xc90c >> 2),
430 	0x00000000,
431 	(0x8e00 << 16) | (0xc90c >> 2),
432 	0x00000000,
433 	(0x9e00 << 16) | (0xc90c >> 2),
434 	0x00000000,
435 	(0xae00 << 16) | (0xc90c >> 2),
436 	0x00000000,
437 	(0xbe00 << 16) | (0xc90c >> 2),
438 	0x00000000,
439 	(0x4e00 << 16) | (0xc910 >> 2),
440 	0x00000000,
441 	(0x5e00 << 16) | (0xc910 >> 2),
442 	0x00000000,
443 	(0x6e00 << 16) | (0xc910 >> 2),
444 	0x00000000,
445 	(0x7e00 << 16) | (0xc910 >> 2),
446 	0x00000000,
447 	(0x8e00 << 16) | (0xc910 >> 2),
448 	0x00000000,
449 	(0x9e00 << 16) | (0xc910 >> 2),
450 	0x00000000,
451 	(0xae00 << 16) | (0xc910 >> 2),
452 	0x00000000,
453 	(0xbe00 << 16) | (0xc910 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0xc99c >> 2),
456 	0x00000000,
457 	(0x0e00 << 16) | (0x9834 >> 2),
458 	0x00000000,
459 	(0x0000 << 16) | (0x30f00 >> 2),
460 	0x00000000,
461 	(0x0001 << 16) | (0x30f00 >> 2),
462 	0x00000000,
463 	(0x0000 << 16) | (0x30f04 >> 2),
464 	0x00000000,
465 	(0x0001 << 16) | (0x30f04 >> 2),
466 	0x00000000,
467 	(0x0000 << 16) | (0x30f08 >> 2),
468 	0x00000000,
469 	(0x0001 << 16) | (0x30f08 >> 2),
470 	0x00000000,
471 	(0x0000 << 16) | (0x30f0c >> 2),
472 	0x00000000,
473 	(0x0001 << 16) | (0x30f0c >> 2),
474 	0x00000000,
475 	(0x0600 << 16) | (0x9b7c >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x8a14 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x8a18 >> 2),
480 	0x00000000,
481 	(0x0600 << 16) | (0x30a00 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8bf0 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8bcc >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x8b24 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x30a04 >> 2),
490 	0x00000000,
491 	(0x0600 << 16) | (0x30a10 >> 2),
492 	0x00000000,
493 	(0x0600 << 16) | (0x30a14 >> 2),
494 	0x00000000,
495 	(0x0600 << 16) | (0x30a18 >> 2),
496 	0x00000000,
497 	(0x0600 << 16) | (0x30a2c >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xc700 >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xc704 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xc708 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xc768 >> 2),
506 	0x00000000,
507 	(0x0400 << 16) | (0xc770 >> 2),
508 	0x00000000,
509 	(0x0400 << 16) | (0xc774 >> 2),
510 	0x00000000,
511 	(0x0400 << 16) | (0xc778 >> 2),
512 	0x00000000,
513 	(0x0400 << 16) | (0xc77c >> 2),
514 	0x00000000,
515 	(0x0400 << 16) | (0xc780 >> 2),
516 	0x00000000,
517 	(0x0400 << 16) | (0xc784 >> 2),
518 	0x00000000,
519 	(0x0400 << 16) | (0xc788 >> 2),
520 	0x00000000,
521 	(0x0400 << 16) | (0xc78c >> 2),
522 	0x00000000,
523 	(0x0400 << 16) | (0xc798 >> 2),
524 	0x00000000,
525 	(0x0400 << 16) | (0xc79c >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc7a0 >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc7a4 >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc7a8 >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc7ac >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc7b0 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc7b4 >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0x9100 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0x3c010 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0x92a8 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x92ac >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x92b4 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x92b8 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x92bc >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x92c0 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x92c4 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x92c8 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x92cc >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x92d0 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x8c00 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x8c04 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x8c20 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8c38 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8c3c >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0xae00 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x9604 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0xac08 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0xac0c >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0xac10 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0xac14 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0xac58 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0xac68 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0xac6c >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0xac70 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xac74 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0xac78 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac7c >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac80 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac84 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac88 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac8c >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x970c >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0x9714 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0x9718 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x971c >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x31068 >> 2),
616 	0x00000000,
617 	(0x4e00 << 16) | (0x31068 >> 2),
618 	0x00000000,
619 	(0x5e00 << 16) | (0x31068 >> 2),
620 	0x00000000,
621 	(0x6e00 << 16) | (0x31068 >> 2),
622 	0x00000000,
623 	(0x7e00 << 16) | (0x31068 >> 2),
624 	0x00000000,
625 	(0x8e00 << 16) | (0x31068 >> 2),
626 	0x00000000,
627 	(0x9e00 << 16) | (0x31068 >> 2),
628 	0x00000000,
629 	(0xae00 << 16) | (0x31068 >> 2),
630 	0x00000000,
631 	(0xbe00 << 16) | (0x31068 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xcd10 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xcd14 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x88b0 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x88b4 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x88b8 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x88bc >> 2),
644 	0x00000000,
645 	(0x0400 << 16) | (0x89c0 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x88c4 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x88c8 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x88d0 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x88d4 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x88d8 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x8980 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x30938 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x3093c >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x30940 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x89a0 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x30900 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x30904 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x89b4 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x3c210 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x3c214 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x3c218 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x8904 >> 2),
680 	0x00000000,
681 	0x5,
682 	(0x0e00 << 16) | (0x8c28 >> 2),
683 	(0x0e00 << 16) | (0x8c2c >> 2),
684 	(0x0e00 << 16) | (0x8c30 >> 2),
685 	(0x0e00 << 16) | (0x8c34 >> 2),
686 	(0x0e00 << 16) | (0x9600 >> 2),
687 };
688 
689 static const u32 kalindi_rlc_save_restore_register_list[] =
690 {
691 	(0x0e00 << 16) | (0xc12c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc140 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc150 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc15c >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc168 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc170 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc204 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc2b4 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc2b8 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc2bc >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc2c0 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x8228 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x829c >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x869c >> 2),
718 	0x00000000,
719 	(0x0600 << 16) | (0x98f4 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0x98f8 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0x9900 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc260 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0x90e8 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0x3c000 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0x3c00c >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x8c1c >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x9700 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xcd20 >> 2),
738 	0x00000000,
739 	(0x4e00 << 16) | (0xcd20 >> 2),
740 	0x00000000,
741 	(0x5e00 << 16) | (0xcd20 >> 2),
742 	0x00000000,
743 	(0x6e00 << 16) | (0xcd20 >> 2),
744 	0x00000000,
745 	(0x7e00 << 16) | (0xcd20 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x89bc >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x8900 >> 2),
750 	0x00000000,
751 	0x3,
752 	(0x0e00 << 16) | (0xc130 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0xc134 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0xc1fc >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0xc208 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0xc264 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xc268 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xc26c >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xc270 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0xc274 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xc28c >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0xc290 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc294 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc298 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc2a0 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc2a4 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc2a8 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc2ac >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0x301d0 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0x30238 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0x30250 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0x30254 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0x30258 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x3025c >> 2),
797 	0x00000000,
798 	(0x4e00 << 16) | (0xc900 >> 2),
799 	0x00000000,
800 	(0x5e00 << 16) | (0xc900 >> 2),
801 	0x00000000,
802 	(0x6e00 << 16) | (0xc900 >> 2),
803 	0x00000000,
804 	(0x7e00 << 16) | (0xc900 >> 2),
805 	0x00000000,
806 	(0x4e00 << 16) | (0xc904 >> 2),
807 	0x00000000,
808 	(0x5e00 << 16) | (0xc904 >> 2),
809 	0x00000000,
810 	(0x6e00 << 16) | (0xc904 >> 2),
811 	0x00000000,
812 	(0x7e00 << 16) | (0xc904 >> 2),
813 	0x00000000,
814 	(0x4e00 << 16) | (0xc908 >> 2),
815 	0x00000000,
816 	(0x5e00 << 16) | (0xc908 >> 2),
817 	0x00000000,
818 	(0x6e00 << 16) | (0xc908 >> 2),
819 	0x00000000,
820 	(0x7e00 << 16) | (0xc908 >> 2),
821 	0x00000000,
822 	(0x4e00 << 16) | (0xc90c >> 2),
823 	0x00000000,
824 	(0x5e00 << 16) | (0xc90c >> 2),
825 	0x00000000,
826 	(0x6e00 << 16) | (0xc90c >> 2),
827 	0x00000000,
828 	(0x7e00 << 16) | (0xc90c >> 2),
829 	0x00000000,
830 	(0x4e00 << 16) | (0xc910 >> 2),
831 	0x00000000,
832 	(0x5e00 << 16) | (0xc910 >> 2),
833 	0x00000000,
834 	(0x6e00 << 16) | (0xc910 >> 2),
835 	0x00000000,
836 	(0x7e00 << 16) | (0xc910 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xc99c >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0x9834 >> 2),
841 	0x00000000,
842 	(0x0000 << 16) | (0x30f00 >> 2),
843 	0x00000000,
844 	(0x0000 << 16) | (0x30f04 >> 2),
845 	0x00000000,
846 	(0x0000 << 16) | (0x30f08 >> 2),
847 	0x00000000,
848 	(0x0000 << 16) | (0x30f0c >> 2),
849 	0x00000000,
850 	(0x0600 << 16) | (0x9b7c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0x8a14 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x8a18 >> 2),
855 	0x00000000,
856 	(0x0600 << 16) | (0x30a00 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x8bf0 >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x8bcc >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x8b24 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x30a04 >> 2),
865 	0x00000000,
866 	(0x0600 << 16) | (0x30a10 >> 2),
867 	0x00000000,
868 	(0x0600 << 16) | (0x30a14 >> 2),
869 	0x00000000,
870 	(0x0600 << 16) | (0x30a18 >> 2),
871 	0x00000000,
872 	(0x0600 << 16) | (0x30a2c >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0xc700 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0xc704 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xc708 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xc768 >> 2),
881 	0x00000000,
882 	(0x0400 << 16) | (0xc770 >> 2),
883 	0x00000000,
884 	(0x0400 << 16) | (0xc774 >> 2),
885 	0x00000000,
886 	(0x0400 << 16) | (0xc798 >> 2),
887 	0x00000000,
888 	(0x0400 << 16) | (0xc79c >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x9100 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x3c010 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x8c00 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x8c04 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x8c20 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x8c38 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x8c3c >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0xae00 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x9604 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xac08 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xac0c >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xac10 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0xac14 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xac58 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0xac68 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0xac6c >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xac70 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xac74 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0xac78 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac7c >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac80 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac84 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac88 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac8c >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x970c >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0x9714 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x9718 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x971c >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x31068 >> 2),
947 	0x00000000,
948 	(0x4e00 << 16) | (0x31068 >> 2),
949 	0x00000000,
950 	(0x5e00 << 16) | (0x31068 >> 2),
951 	0x00000000,
952 	(0x6e00 << 16) | (0x31068 >> 2),
953 	0x00000000,
954 	(0x7e00 << 16) | (0x31068 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xcd10 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xcd14 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0x88b0 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0x88b4 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0x88b8 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x88bc >> 2),
967 	0x00000000,
968 	(0x0400 << 16) | (0x89c0 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x88c4 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x88c8 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x88d0 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x88d4 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x88d8 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x8980 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x30938 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x3093c >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x30940 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0x89a0 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x30900 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x30904 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x89b4 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x3e1fc >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x3c210 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x3c214 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x3c218 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x8904 >> 2),
1005 	0x00000000,
1006 	0x5,
1007 	(0x0e00 << 16) | (0x8c28 >> 2),
1008 	(0x0e00 << 16) | (0x8c2c >> 2),
1009 	(0x0e00 << 16) | (0x8c30 >> 2),
1010 	(0x0e00 << 16) | (0x8c34 >> 2),
1011 	(0x0e00 << 16) | (0x9600 >> 2),
1012 };
1013 
1014 static const u32 bonaire_golden_spm_registers[] =
1015 {
1016 	0x30800, 0xe0ffffff, 0xe0000000
1017 };
1018 
1019 static const u32 bonaire_golden_common_registers[] =
1020 {
1021 	0xc770, 0xffffffff, 0x00000800,
1022 	0xc774, 0xffffffff, 0x00000800,
1023 	0xc798, 0xffffffff, 0x00007fbf,
1024 	0xc79c, 0xffffffff, 0x00007faf
1025 };
1026 
1027 static const u32 bonaire_golden_registers[] =
1028 {
1029 	0x3354, 0x00000333, 0x00000333,
1030 	0x3350, 0x000c0fc0, 0x00040200,
1031 	0x9a10, 0x00010000, 0x00058208,
1032 	0x3c000, 0xffff1fff, 0x00140000,
1033 	0x3c200, 0xfdfc0fff, 0x00000100,
1034 	0x3c234, 0x40000000, 0x40000200,
1035 	0x9830, 0xffffffff, 0x00000000,
1036 	0x9834, 0xf00fffff, 0x00000400,
1037 	0x9838, 0x0002021c, 0x00020200,
1038 	0xc78, 0x00000080, 0x00000000,
1039 	0x5bb0, 0x000000f0, 0x00000070,
1040 	0x5bc0, 0xf0311fff, 0x80300000,
1041 	0x98f8, 0x73773777, 0x12010001,
1042 	0x350c, 0x00810000, 0x408af000,
1043 	0x7030, 0x31000111, 0x00000011,
1044 	0x2f48, 0x73773777, 0x12010001,
1045 	0x220c, 0x00007fb6, 0x0021a1b1,
1046 	0x2210, 0x00007fb6, 0x002021b1,
1047 	0x2180, 0x00007fb6, 0x00002191,
1048 	0x2218, 0x00007fb6, 0x002121b1,
1049 	0x221c, 0x00007fb6, 0x002021b1,
1050 	0x21dc, 0x00007fb6, 0x00002191,
1051 	0x21e0, 0x00007fb6, 0x00002191,
1052 	0x3628, 0x0000003f, 0x0000000a,
1053 	0x362c, 0x0000003f, 0x0000000a,
1054 	0x2ae4, 0x00073ffe, 0x000022a2,
1055 	0x240c, 0x000007ff, 0x00000000,
1056 	0x8a14, 0xf000003f, 0x00000007,
1057 	0x8bf0, 0x00002001, 0x00000001,
1058 	0x8b24, 0xffffffff, 0x00ffffff,
1059 	0x30a04, 0x0000ff0f, 0x00000000,
1060 	0x28a4c, 0x07ffffff, 0x06000000,
1061 	0x4d8, 0x00000fff, 0x00000100,
1062 	0x3e78, 0x00000001, 0x00000002,
1063 	0x9100, 0x03000000, 0x0362c688,
1064 	0x8c00, 0x000000ff, 0x00000001,
1065 	0xe40, 0x00001fff, 0x00001fff,
1066 	0x9060, 0x0000007f, 0x00000020,
1067 	0x9508, 0x00010000, 0x00010000,
1068 	0xac14, 0x000003ff, 0x000000f3,
1069 	0xac0c, 0xffffffff, 0x00001032
1070 };
1071 
1072 static const u32 bonaire_mgcg_cgcg_init[] =
1073 {
1074 	0xc420, 0xffffffff, 0xfffffffc,
1075 	0x30800, 0xffffffff, 0xe0000000,
1076 	0x3c2a0, 0xffffffff, 0x00000100,
1077 	0x3c208, 0xffffffff, 0x00000100,
1078 	0x3c2c0, 0xffffffff, 0xc0000100,
1079 	0x3c2c8, 0xffffffff, 0xc0000100,
1080 	0x3c2c4, 0xffffffff, 0xc0000100,
1081 	0x55e4, 0xffffffff, 0x00600100,
1082 	0x3c280, 0xffffffff, 0x00000100,
1083 	0x3c214, 0xffffffff, 0x06000100,
1084 	0x3c220, 0xffffffff, 0x00000100,
1085 	0x3c218, 0xffffffff, 0x06000100,
1086 	0x3c204, 0xffffffff, 0x00000100,
1087 	0x3c2e0, 0xffffffff, 0x00000100,
1088 	0x3c224, 0xffffffff, 0x00000100,
1089 	0x3c200, 0xffffffff, 0x00000100,
1090 	0x3c230, 0xffffffff, 0x00000100,
1091 	0x3c234, 0xffffffff, 0x00000100,
1092 	0x3c250, 0xffffffff, 0x00000100,
1093 	0x3c254, 0xffffffff, 0x00000100,
1094 	0x3c258, 0xffffffff, 0x00000100,
1095 	0x3c25c, 0xffffffff, 0x00000100,
1096 	0x3c260, 0xffffffff, 0x00000100,
1097 	0x3c27c, 0xffffffff, 0x00000100,
1098 	0x3c278, 0xffffffff, 0x00000100,
1099 	0x3c210, 0xffffffff, 0x06000100,
1100 	0x3c290, 0xffffffff, 0x00000100,
1101 	0x3c274, 0xffffffff, 0x00000100,
1102 	0x3c2b4, 0xffffffff, 0x00000100,
1103 	0x3c2b0, 0xffffffff, 0x00000100,
1104 	0x3c270, 0xffffffff, 0x00000100,
1105 	0x30800, 0xffffffff, 0xe0000000,
1106 	0x3c020, 0xffffffff, 0x00010000,
1107 	0x3c024, 0xffffffff, 0x00030002,
1108 	0x3c028, 0xffffffff, 0x00040007,
1109 	0x3c02c, 0xffffffff, 0x00060005,
1110 	0x3c030, 0xffffffff, 0x00090008,
1111 	0x3c034, 0xffffffff, 0x00010000,
1112 	0x3c038, 0xffffffff, 0x00030002,
1113 	0x3c03c, 0xffffffff, 0x00040007,
1114 	0x3c040, 0xffffffff, 0x00060005,
1115 	0x3c044, 0xffffffff, 0x00090008,
1116 	0x3c048, 0xffffffff, 0x00010000,
1117 	0x3c04c, 0xffffffff, 0x00030002,
1118 	0x3c050, 0xffffffff, 0x00040007,
1119 	0x3c054, 0xffffffff, 0x00060005,
1120 	0x3c058, 0xffffffff, 0x00090008,
1121 	0x3c05c, 0xffffffff, 0x00010000,
1122 	0x3c060, 0xffffffff, 0x00030002,
1123 	0x3c064, 0xffffffff, 0x00040007,
1124 	0x3c068, 0xffffffff, 0x00060005,
1125 	0x3c06c, 0xffffffff, 0x00090008,
1126 	0x3c070, 0xffffffff, 0x00010000,
1127 	0x3c074, 0xffffffff, 0x00030002,
1128 	0x3c078, 0xffffffff, 0x00040007,
1129 	0x3c07c, 0xffffffff, 0x00060005,
1130 	0x3c080, 0xffffffff, 0x00090008,
1131 	0x3c084, 0xffffffff, 0x00010000,
1132 	0x3c088, 0xffffffff, 0x00030002,
1133 	0x3c08c, 0xffffffff, 0x00040007,
1134 	0x3c090, 0xffffffff, 0x00060005,
1135 	0x3c094, 0xffffffff, 0x00090008,
1136 	0x3c098, 0xffffffff, 0x00010000,
1137 	0x3c09c, 0xffffffff, 0x00030002,
1138 	0x3c0a0, 0xffffffff, 0x00040007,
1139 	0x3c0a4, 0xffffffff, 0x00060005,
1140 	0x3c0a8, 0xffffffff, 0x00090008,
1141 	0x3c000, 0xffffffff, 0x96e00200,
1142 	0x8708, 0xffffffff, 0x00900100,
1143 	0xc424, 0xffffffff, 0x0020003f,
1144 	0x38, 0xffffffff, 0x0140001c,
1145 	0x3c, 0x000f0000, 0x000f0000,
1146 	0x220, 0xffffffff, 0xC060000C,
1147 	0x224, 0xc0000fff, 0x00000100,
1148 	0xf90, 0xffffffff, 0x00000100,
1149 	0xf98, 0x00000101, 0x00000000,
1150 	0x20a8, 0xffffffff, 0x00000104,
1151 	0x55e4, 0xff000fff, 0x00000100,
1152 	0x30cc, 0xc0000fff, 0x00000104,
1153 	0xc1e4, 0x00000001, 0x00000001,
1154 	0xd00c, 0xff000ff0, 0x00000100,
1155 	0xd80c, 0xff000ff0, 0x00000100
1156 };
1157 
1158 static const u32 spectre_golden_spm_registers[] =
1159 {
1160 	0x30800, 0xe0ffffff, 0xe0000000
1161 };
1162 
1163 static const u32 spectre_golden_common_registers[] =
1164 {
1165 	0xc770, 0xffffffff, 0x00000800,
1166 	0xc774, 0xffffffff, 0x00000800,
1167 	0xc798, 0xffffffff, 0x00007fbf,
1168 	0xc79c, 0xffffffff, 0x00007faf
1169 };
1170 
1171 static const u32 spectre_golden_registers[] =
1172 {
1173 	0x3c000, 0xffff1fff, 0x96940200,
1174 	0x3c00c, 0xffff0001, 0xff000000,
1175 	0x3c200, 0xfffc0fff, 0x00000100,
1176 	0x6ed8, 0x00010101, 0x00010000,
1177 	0x9834, 0xf00fffff, 0x00000400,
1178 	0x9838, 0xfffffffc, 0x00020200,
1179 	0x5bb0, 0x000000f0, 0x00000070,
1180 	0x5bc0, 0xf0311fff, 0x80300000,
1181 	0x98f8, 0x73773777, 0x12010001,
1182 	0x9b7c, 0x00ff0000, 0x00fc0000,
1183 	0x2f48, 0x73773777, 0x12010001,
1184 	0x8a14, 0xf000003f, 0x00000007,
1185 	0x8b24, 0xffffffff, 0x00ffffff,
1186 	0x28350, 0x3f3f3fff, 0x00000082,
1187 	0x28354, 0x0000003f, 0x00000000,
1188 	0x3e78, 0x00000001, 0x00000002,
1189 	0x913c, 0xffff03df, 0x00000004,
1190 	0xc768, 0x00000008, 0x00000008,
1191 	0x8c00, 0x000008ff, 0x00000800,
1192 	0x9508, 0x00010000, 0x00010000,
1193 	0xac0c, 0xffffffff, 0x54763210,
1194 	0x214f8, 0x01ff01ff, 0x00000002,
1195 	0x21498, 0x007ff800, 0x00200000,
1196 	0x2015c, 0xffffffff, 0x00000f40,
1197 	0x30934, 0xffffffff, 0x00000001
1198 };
1199 
1200 static const u32 spectre_mgcg_cgcg_init[] =
1201 {
1202 	0xc420, 0xffffffff, 0xfffffffc,
1203 	0x30800, 0xffffffff, 0xe0000000,
1204 	0x3c2a0, 0xffffffff, 0x00000100,
1205 	0x3c208, 0xffffffff, 0x00000100,
1206 	0x3c2c0, 0xffffffff, 0x00000100,
1207 	0x3c2c8, 0xffffffff, 0x00000100,
1208 	0x3c2c4, 0xffffffff, 0x00000100,
1209 	0x55e4, 0xffffffff, 0x00600100,
1210 	0x3c280, 0xffffffff, 0x00000100,
1211 	0x3c214, 0xffffffff, 0x06000100,
1212 	0x3c220, 0xffffffff, 0x00000100,
1213 	0x3c218, 0xffffffff, 0x06000100,
1214 	0x3c204, 0xffffffff, 0x00000100,
1215 	0x3c2e0, 0xffffffff, 0x00000100,
1216 	0x3c224, 0xffffffff, 0x00000100,
1217 	0x3c200, 0xffffffff, 0x00000100,
1218 	0x3c230, 0xffffffff, 0x00000100,
1219 	0x3c234, 0xffffffff, 0x00000100,
1220 	0x3c250, 0xffffffff, 0x00000100,
1221 	0x3c254, 0xffffffff, 0x00000100,
1222 	0x3c258, 0xffffffff, 0x00000100,
1223 	0x3c25c, 0xffffffff, 0x00000100,
1224 	0x3c260, 0xffffffff, 0x00000100,
1225 	0x3c27c, 0xffffffff, 0x00000100,
1226 	0x3c278, 0xffffffff, 0x00000100,
1227 	0x3c210, 0xffffffff, 0x06000100,
1228 	0x3c290, 0xffffffff, 0x00000100,
1229 	0x3c274, 0xffffffff, 0x00000100,
1230 	0x3c2b4, 0xffffffff, 0x00000100,
1231 	0x3c2b0, 0xffffffff, 0x00000100,
1232 	0x3c270, 0xffffffff, 0x00000100,
1233 	0x30800, 0xffffffff, 0xe0000000,
1234 	0x3c020, 0xffffffff, 0x00010000,
1235 	0x3c024, 0xffffffff, 0x00030002,
1236 	0x3c028, 0xffffffff, 0x00040007,
1237 	0x3c02c, 0xffffffff, 0x00060005,
1238 	0x3c030, 0xffffffff, 0x00090008,
1239 	0x3c034, 0xffffffff, 0x00010000,
1240 	0x3c038, 0xffffffff, 0x00030002,
1241 	0x3c03c, 0xffffffff, 0x00040007,
1242 	0x3c040, 0xffffffff, 0x00060005,
1243 	0x3c044, 0xffffffff, 0x00090008,
1244 	0x3c048, 0xffffffff, 0x00010000,
1245 	0x3c04c, 0xffffffff, 0x00030002,
1246 	0x3c050, 0xffffffff, 0x00040007,
1247 	0x3c054, 0xffffffff, 0x00060005,
1248 	0x3c058, 0xffffffff, 0x00090008,
1249 	0x3c05c, 0xffffffff, 0x00010000,
1250 	0x3c060, 0xffffffff, 0x00030002,
1251 	0x3c064, 0xffffffff, 0x00040007,
1252 	0x3c068, 0xffffffff, 0x00060005,
1253 	0x3c06c, 0xffffffff, 0x00090008,
1254 	0x3c070, 0xffffffff, 0x00010000,
1255 	0x3c074, 0xffffffff, 0x00030002,
1256 	0x3c078, 0xffffffff, 0x00040007,
1257 	0x3c07c, 0xffffffff, 0x00060005,
1258 	0x3c080, 0xffffffff, 0x00090008,
1259 	0x3c084, 0xffffffff, 0x00010000,
1260 	0x3c088, 0xffffffff, 0x00030002,
1261 	0x3c08c, 0xffffffff, 0x00040007,
1262 	0x3c090, 0xffffffff, 0x00060005,
1263 	0x3c094, 0xffffffff, 0x00090008,
1264 	0x3c098, 0xffffffff, 0x00010000,
1265 	0x3c09c, 0xffffffff, 0x00030002,
1266 	0x3c0a0, 0xffffffff, 0x00040007,
1267 	0x3c0a4, 0xffffffff, 0x00060005,
1268 	0x3c0a8, 0xffffffff, 0x00090008,
1269 	0x3c0ac, 0xffffffff, 0x00010000,
1270 	0x3c0b0, 0xffffffff, 0x00030002,
1271 	0x3c0b4, 0xffffffff, 0x00040007,
1272 	0x3c0b8, 0xffffffff, 0x00060005,
1273 	0x3c0bc, 0xffffffff, 0x00090008,
1274 	0x3c000, 0xffffffff, 0x96e00200,
1275 	0x8708, 0xffffffff, 0x00900100,
1276 	0xc424, 0xffffffff, 0x0020003f,
1277 	0x38, 0xffffffff, 0x0140001c,
1278 	0x3c, 0x000f0000, 0x000f0000,
1279 	0x220, 0xffffffff, 0xC060000C,
1280 	0x224, 0xc0000fff, 0x00000100,
1281 	0xf90, 0xffffffff, 0x00000100,
1282 	0xf98, 0x00000101, 0x00000000,
1283 	0x20a8, 0xffffffff, 0x00000104,
1284 	0x55e4, 0xff000fff, 0x00000100,
1285 	0x30cc, 0xc0000fff, 0x00000104,
1286 	0xc1e4, 0x00000001, 0x00000001,
1287 	0xd00c, 0xff000ff0, 0x00000100,
1288 	0xd80c, 0xff000ff0, 0x00000100
1289 };
1290 
1291 static const u32 kalindi_golden_spm_registers[] =
1292 {
1293 	0x30800, 0xe0ffffff, 0xe0000000
1294 };
1295 
1296 static const u32 kalindi_golden_common_registers[] =
1297 {
1298 	0xc770, 0xffffffff, 0x00000800,
1299 	0xc774, 0xffffffff, 0x00000800,
1300 	0xc798, 0xffffffff, 0x00007fbf,
1301 	0xc79c, 0xffffffff, 0x00007faf
1302 };
1303 
1304 static const u32 kalindi_golden_registers[] =
1305 {
1306 	0x3c000, 0xffffdfff, 0x6e944040,
1307 	0x55e4, 0xff607fff, 0xfc000100,
1308 	0x3c220, 0xff000fff, 0x00000100,
1309 	0x3c224, 0xff000fff, 0x00000100,
1310 	0x3c200, 0xfffc0fff, 0x00000100,
1311 	0x6ed8, 0x00010101, 0x00010000,
1312 	0x9830, 0xffffffff, 0x00000000,
1313 	0x9834, 0xf00fffff, 0x00000400,
1314 	0x5bb0, 0x000000f0, 0x00000070,
1315 	0x5bc0, 0xf0311fff, 0x80300000,
1316 	0x98f8, 0x73773777, 0x12010001,
1317 	0x98fc, 0xffffffff, 0x00000010,
1318 	0x9b7c, 0x00ff0000, 0x00fc0000,
1319 	0x8030, 0x00001f0f, 0x0000100a,
1320 	0x2f48, 0x73773777, 0x12010001,
1321 	0x2408, 0x000fffff, 0x000c007f,
1322 	0x8a14, 0xf000003f, 0x00000007,
1323 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1324 	0x30a04, 0x0000ff0f, 0x00000000,
1325 	0x28a4c, 0x07ffffff, 0x06000000,
1326 	0x4d8, 0x00000fff, 0x00000100,
1327 	0x3e78, 0x00000001, 0x00000002,
1328 	0xc768, 0x00000008, 0x00000008,
1329 	0x8c00, 0x000000ff, 0x00000003,
1330 	0x214f8, 0x01ff01ff, 0x00000002,
1331 	0x21498, 0x007ff800, 0x00200000,
1332 	0x2015c, 0xffffffff, 0x00000f40,
1333 	0x88c4, 0x001f3ae3, 0x00000082,
1334 	0x88d4, 0x0000001f, 0x00000010,
1335 	0x30934, 0xffffffff, 0x00000000
1336 };
1337 
1338 static const u32 kalindi_mgcg_cgcg_init[] =
1339 {
1340 	0xc420, 0xffffffff, 0xfffffffc,
1341 	0x30800, 0xffffffff, 0xe0000000,
1342 	0x3c2a0, 0xffffffff, 0x00000100,
1343 	0x3c208, 0xffffffff, 0x00000100,
1344 	0x3c2c0, 0xffffffff, 0x00000100,
1345 	0x3c2c8, 0xffffffff, 0x00000100,
1346 	0x3c2c4, 0xffffffff, 0x00000100,
1347 	0x55e4, 0xffffffff, 0x00600100,
1348 	0x3c280, 0xffffffff, 0x00000100,
1349 	0x3c214, 0xffffffff, 0x06000100,
1350 	0x3c220, 0xffffffff, 0x00000100,
1351 	0x3c218, 0xffffffff, 0x06000100,
1352 	0x3c204, 0xffffffff, 0x00000100,
1353 	0x3c2e0, 0xffffffff, 0x00000100,
1354 	0x3c224, 0xffffffff, 0x00000100,
1355 	0x3c200, 0xffffffff, 0x00000100,
1356 	0x3c230, 0xffffffff, 0x00000100,
1357 	0x3c234, 0xffffffff, 0x00000100,
1358 	0x3c250, 0xffffffff, 0x00000100,
1359 	0x3c254, 0xffffffff, 0x00000100,
1360 	0x3c258, 0xffffffff, 0x00000100,
1361 	0x3c25c, 0xffffffff, 0x00000100,
1362 	0x3c260, 0xffffffff, 0x00000100,
1363 	0x3c27c, 0xffffffff, 0x00000100,
1364 	0x3c278, 0xffffffff, 0x00000100,
1365 	0x3c210, 0xffffffff, 0x06000100,
1366 	0x3c290, 0xffffffff, 0x00000100,
1367 	0x3c274, 0xffffffff, 0x00000100,
1368 	0x3c2b4, 0xffffffff, 0x00000100,
1369 	0x3c2b0, 0xffffffff, 0x00000100,
1370 	0x3c270, 0xffffffff, 0x00000100,
1371 	0x30800, 0xffffffff, 0xe0000000,
1372 	0x3c020, 0xffffffff, 0x00010000,
1373 	0x3c024, 0xffffffff, 0x00030002,
1374 	0x3c028, 0xffffffff, 0x00040007,
1375 	0x3c02c, 0xffffffff, 0x00060005,
1376 	0x3c030, 0xffffffff, 0x00090008,
1377 	0x3c034, 0xffffffff, 0x00010000,
1378 	0x3c038, 0xffffffff, 0x00030002,
1379 	0x3c03c, 0xffffffff, 0x00040007,
1380 	0x3c040, 0xffffffff, 0x00060005,
1381 	0x3c044, 0xffffffff, 0x00090008,
1382 	0x3c000, 0xffffffff, 0x96e00200,
1383 	0x8708, 0xffffffff, 0x00900100,
1384 	0xc424, 0xffffffff, 0x0020003f,
1385 	0x38, 0xffffffff, 0x0140001c,
1386 	0x3c, 0x000f0000, 0x000f0000,
1387 	0x220, 0xffffffff, 0xC060000C,
1388 	0x224, 0xc0000fff, 0x00000100,
1389 	0x20a8, 0xffffffff, 0x00000104,
1390 	0x55e4, 0xff000fff, 0x00000100,
1391 	0x30cc, 0xc0000fff, 0x00000104,
1392 	0xc1e4, 0x00000001, 0x00000001,
1393 	0xd00c, 0xff000ff0, 0x00000100,
1394 	0xd80c, 0xff000ff0, 0x00000100
1395 };
1396 
1397 static const u32 hawaii_golden_spm_registers[] =
1398 {
1399 	0x30800, 0xe0ffffff, 0xe0000000
1400 };
1401 
1402 static const u32 hawaii_golden_common_registers[] =
1403 {
1404 	0x30800, 0xffffffff, 0xe0000000,
1405 	0x28350, 0xffffffff, 0x3a00161a,
1406 	0x28354, 0xffffffff, 0x0000002e,
1407 	0x9a10, 0xffffffff, 0x00018208,
1408 	0x98f8, 0xffffffff, 0x12011003
1409 };
1410 
1411 static const u32 hawaii_golden_registers[] =
1412 {
1413 	0x3354, 0x00000333, 0x00000333,
1414 	0x9a10, 0x00010000, 0x00058208,
1415 	0x9830, 0xffffffff, 0x00000000,
1416 	0x9834, 0xf00fffff, 0x00000400,
1417 	0x9838, 0x0002021c, 0x00020200,
1418 	0xc78, 0x00000080, 0x00000000,
1419 	0x5bb0, 0x000000f0, 0x00000070,
1420 	0x5bc0, 0xf0311fff, 0x80300000,
1421 	0x350c, 0x00810000, 0x408af000,
1422 	0x7030, 0x31000111, 0x00000011,
1423 	0x2f48, 0x73773777, 0x12010001,
1424 	0x2120, 0x0000007f, 0x0000001b,
1425 	0x21dc, 0x00007fb6, 0x00002191,
1426 	0x3628, 0x0000003f, 0x0000000a,
1427 	0x362c, 0x0000003f, 0x0000000a,
1428 	0x2ae4, 0x00073ffe, 0x000022a2,
1429 	0x240c, 0x000007ff, 0x00000000,
1430 	0x8bf0, 0x00002001, 0x00000001,
1431 	0x8b24, 0xffffffff, 0x00ffffff,
1432 	0x30a04, 0x0000ff0f, 0x00000000,
1433 	0x28a4c, 0x07ffffff, 0x06000000,
1434 	0x3e78, 0x00000001, 0x00000002,
1435 	0xc768, 0x00000008, 0x00000008,
1436 	0xc770, 0x00000f00, 0x00000800,
1437 	0xc774, 0x00000f00, 0x00000800,
1438 	0xc798, 0x00ffffff, 0x00ff7fbf,
1439 	0xc79c, 0x00ffffff, 0x00ff7faf,
1440 	0x8c00, 0x000000ff, 0x00000800,
1441 	0xe40, 0x00001fff, 0x00001fff,
1442 	0x9060, 0x0000007f, 0x00000020,
1443 	0x9508, 0x00010000, 0x00010000,
1444 	0xae00, 0x00100000, 0x000ff07c,
1445 	0xac14, 0x000003ff, 0x0000000f,
1446 	0xac10, 0xffffffff, 0x7564fdec,
1447 	0xac0c, 0xffffffff, 0x3120b9a8,
1448 	0xac08, 0x20000000, 0x0f9c0000
1449 };
1450 
1451 static const u32 hawaii_mgcg_cgcg_init[] =
1452 {
1453 	0xc420, 0xffffffff, 0xfffffffd,
1454 	0x30800, 0xffffffff, 0xe0000000,
1455 	0x3c2a0, 0xffffffff, 0x00000100,
1456 	0x3c208, 0xffffffff, 0x00000100,
1457 	0x3c2c0, 0xffffffff, 0x00000100,
1458 	0x3c2c8, 0xffffffff, 0x00000100,
1459 	0x3c2c4, 0xffffffff, 0x00000100,
1460 	0x55e4, 0xffffffff, 0x00200100,
1461 	0x3c280, 0xffffffff, 0x00000100,
1462 	0x3c214, 0xffffffff, 0x06000100,
1463 	0x3c220, 0xffffffff, 0x00000100,
1464 	0x3c218, 0xffffffff, 0x06000100,
1465 	0x3c204, 0xffffffff, 0x00000100,
1466 	0x3c2e0, 0xffffffff, 0x00000100,
1467 	0x3c224, 0xffffffff, 0x00000100,
1468 	0x3c200, 0xffffffff, 0x00000100,
1469 	0x3c230, 0xffffffff, 0x00000100,
1470 	0x3c234, 0xffffffff, 0x00000100,
1471 	0x3c250, 0xffffffff, 0x00000100,
1472 	0x3c254, 0xffffffff, 0x00000100,
1473 	0x3c258, 0xffffffff, 0x00000100,
1474 	0x3c25c, 0xffffffff, 0x00000100,
1475 	0x3c260, 0xffffffff, 0x00000100,
1476 	0x3c27c, 0xffffffff, 0x00000100,
1477 	0x3c278, 0xffffffff, 0x00000100,
1478 	0x3c210, 0xffffffff, 0x06000100,
1479 	0x3c290, 0xffffffff, 0x00000100,
1480 	0x3c274, 0xffffffff, 0x00000100,
1481 	0x3c2b4, 0xffffffff, 0x00000100,
1482 	0x3c2b0, 0xffffffff, 0x00000100,
1483 	0x3c270, 0xffffffff, 0x00000100,
1484 	0x30800, 0xffffffff, 0xe0000000,
1485 	0x3c020, 0xffffffff, 0x00010000,
1486 	0x3c024, 0xffffffff, 0x00030002,
1487 	0x3c028, 0xffffffff, 0x00040007,
1488 	0x3c02c, 0xffffffff, 0x00060005,
1489 	0x3c030, 0xffffffff, 0x00090008,
1490 	0x3c034, 0xffffffff, 0x00010000,
1491 	0x3c038, 0xffffffff, 0x00030002,
1492 	0x3c03c, 0xffffffff, 0x00040007,
1493 	0x3c040, 0xffffffff, 0x00060005,
1494 	0x3c044, 0xffffffff, 0x00090008,
1495 	0x3c048, 0xffffffff, 0x00010000,
1496 	0x3c04c, 0xffffffff, 0x00030002,
1497 	0x3c050, 0xffffffff, 0x00040007,
1498 	0x3c054, 0xffffffff, 0x00060005,
1499 	0x3c058, 0xffffffff, 0x00090008,
1500 	0x3c05c, 0xffffffff, 0x00010000,
1501 	0x3c060, 0xffffffff, 0x00030002,
1502 	0x3c064, 0xffffffff, 0x00040007,
1503 	0x3c068, 0xffffffff, 0x00060005,
1504 	0x3c06c, 0xffffffff, 0x00090008,
1505 	0x3c070, 0xffffffff, 0x00010000,
1506 	0x3c074, 0xffffffff, 0x00030002,
1507 	0x3c078, 0xffffffff, 0x00040007,
1508 	0x3c07c, 0xffffffff, 0x00060005,
1509 	0x3c080, 0xffffffff, 0x00090008,
1510 	0x3c084, 0xffffffff, 0x00010000,
1511 	0x3c088, 0xffffffff, 0x00030002,
1512 	0x3c08c, 0xffffffff, 0x00040007,
1513 	0x3c090, 0xffffffff, 0x00060005,
1514 	0x3c094, 0xffffffff, 0x00090008,
1515 	0x3c098, 0xffffffff, 0x00010000,
1516 	0x3c09c, 0xffffffff, 0x00030002,
1517 	0x3c0a0, 0xffffffff, 0x00040007,
1518 	0x3c0a4, 0xffffffff, 0x00060005,
1519 	0x3c0a8, 0xffffffff, 0x00090008,
1520 	0x3c0ac, 0xffffffff, 0x00010000,
1521 	0x3c0b0, 0xffffffff, 0x00030002,
1522 	0x3c0b4, 0xffffffff, 0x00040007,
1523 	0x3c0b8, 0xffffffff, 0x00060005,
1524 	0x3c0bc, 0xffffffff, 0x00090008,
1525 	0x3c0c0, 0xffffffff, 0x00010000,
1526 	0x3c0c4, 0xffffffff, 0x00030002,
1527 	0x3c0c8, 0xffffffff, 0x00040007,
1528 	0x3c0cc, 0xffffffff, 0x00060005,
1529 	0x3c0d0, 0xffffffff, 0x00090008,
1530 	0x3c0d4, 0xffffffff, 0x00010000,
1531 	0x3c0d8, 0xffffffff, 0x00030002,
1532 	0x3c0dc, 0xffffffff, 0x00040007,
1533 	0x3c0e0, 0xffffffff, 0x00060005,
1534 	0x3c0e4, 0xffffffff, 0x00090008,
1535 	0x3c0e8, 0xffffffff, 0x00010000,
1536 	0x3c0ec, 0xffffffff, 0x00030002,
1537 	0x3c0f0, 0xffffffff, 0x00040007,
1538 	0x3c0f4, 0xffffffff, 0x00060005,
1539 	0x3c0f8, 0xffffffff, 0x00090008,
1540 	0xc318, 0xffffffff, 0x00020200,
1541 	0x3350, 0xffffffff, 0x00000200,
1542 	0x15c0, 0xffffffff, 0x00000400,
1543 	0x55e8, 0xffffffff, 0x00000000,
1544 	0x2f50, 0xffffffff, 0x00000902,
1545 	0x3c000, 0xffffffff, 0x96940200,
1546 	0x8708, 0xffffffff, 0x00900100,
1547 	0xc424, 0xffffffff, 0x0020003f,
1548 	0x38, 0xffffffff, 0x0140001c,
1549 	0x3c, 0x000f0000, 0x000f0000,
1550 	0x220, 0xffffffff, 0xc060000c,
1551 	0x224, 0xc0000fff, 0x00000100,
1552 	0xf90, 0xffffffff, 0x00000100,
1553 	0xf98, 0x00000101, 0x00000000,
1554 	0x20a8, 0xffffffff, 0x00000104,
1555 	0x55e4, 0xff000fff, 0x00000100,
1556 	0x30cc, 0xc0000fff, 0x00000104,
1557 	0xc1e4, 0x00000001, 0x00000001,
1558 	0xd00c, 0xff000ff0, 0x00000100,
1559 	0xd80c, 0xff000ff0, 0x00000100
1560 };
1561 
1562 static const u32 godavari_golden_registers[] =
1563 {
1564 	0x55e4, 0xff607fff, 0xfc000100,
1565 	0x6ed8, 0x00010101, 0x00010000,
1566 	0x9830, 0xffffffff, 0x00000000,
1567 	0x98302, 0xf00fffff, 0x00000400,
1568 	0x6130, 0xffffffff, 0x00010000,
1569 	0x5bb0, 0x000000f0, 0x00000070,
1570 	0x5bc0, 0xf0311fff, 0x80300000,
1571 	0x98f8, 0x73773777, 0x12010001,
1572 	0x98fc, 0xffffffff, 0x00000010,
1573 	0x8030, 0x00001f0f, 0x0000100a,
1574 	0x2f48, 0x73773777, 0x12010001,
1575 	0x2408, 0x000fffff, 0x000c007f,
1576 	0x8a14, 0xf000003f, 0x00000007,
1577 	0x8b24, 0xffffffff, 0x00ff0fff,
1578 	0x30a04, 0x0000ff0f, 0x00000000,
1579 	0x28a4c, 0x07ffffff, 0x06000000,
1580 	0x4d8, 0x00000fff, 0x00000100,
1581 	0xd014, 0x00010000, 0x00810001,
1582 	0xd814, 0x00010000, 0x00810001,
1583 	0x3e78, 0x00000001, 0x00000002,
1584 	0xc768, 0x00000008, 0x00000008,
1585 	0xc770, 0x00000f00, 0x00000800,
1586 	0xc774, 0x00000f00, 0x00000800,
1587 	0xc798, 0x00ffffff, 0x00ff7fbf,
1588 	0xc79c, 0x00ffffff, 0x00ff7faf,
1589 	0x8c00, 0x000000ff, 0x00000001,
1590 	0x214f8, 0x01ff01ff, 0x00000002,
1591 	0x21498, 0x007ff800, 0x00200000,
1592 	0x2015c, 0xffffffff, 0x00000f40,
1593 	0x88c4, 0x001f3ae3, 0x00000082,
1594 	0x88d4, 0x0000001f, 0x00000010,
1595 	0x30934, 0xffffffff, 0x00000000
1596 };
1597 
1598 
1599 static void cik_init_golden_registers(struct radeon_device *rdev)
1600 {
1601 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602 	mutex_lock(&rdev->grbm_idx_mutex);
1603 	switch (rdev->family) {
1604 	case CHIP_BONAIRE:
1605 		radeon_program_register_sequence(rdev,
1606 						 bonaire_mgcg_cgcg_init,
1607 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608 		radeon_program_register_sequence(rdev,
1609 						 bonaire_golden_registers,
1610 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611 		radeon_program_register_sequence(rdev,
1612 						 bonaire_golden_common_registers,
1613 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614 		radeon_program_register_sequence(rdev,
1615 						 bonaire_golden_spm_registers,
1616 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617 		break;
1618 	case CHIP_KABINI:
1619 		radeon_program_register_sequence(rdev,
1620 						 kalindi_mgcg_cgcg_init,
1621 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622 		radeon_program_register_sequence(rdev,
1623 						 kalindi_golden_registers,
1624 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625 		radeon_program_register_sequence(rdev,
1626 						 kalindi_golden_common_registers,
1627 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628 		radeon_program_register_sequence(rdev,
1629 						 kalindi_golden_spm_registers,
1630 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631 		break;
1632 	case CHIP_MULLINS:
1633 		radeon_program_register_sequence(rdev,
1634 						 kalindi_mgcg_cgcg_init,
1635 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636 		radeon_program_register_sequence(rdev,
1637 						 godavari_golden_registers,
1638 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 kalindi_golden_common_registers,
1641 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642 		radeon_program_register_sequence(rdev,
1643 						 kalindi_golden_spm_registers,
1644 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645 		break;
1646 	case CHIP_KAVERI:
1647 		radeon_program_register_sequence(rdev,
1648 						 spectre_mgcg_cgcg_init,
1649 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650 		radeon_program_register_sequence(rdev,
1651 						 spectre_golden_registers,
1652 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 spectre_golden_common_registers,
1655 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656 		radeon_program_register_sequence(rdev,
1657 						 spectre_golden_spm_registers,
1658 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659 		break;
1660 	case CHIP_HAWAII:
1661 		radeon_program_register_sequence(rdev,
1662 						 hawaii_mgcg_cgcg_init,
1663 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664 		radeon_program_register_sequence(rdev,
1665 						 hawaii_golden_registers,
1666 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 hawaii_golden_common_registers,
1669 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670 		radeon_program_register_sequence(rdev,
1671 						 hawaii_golden_spm_registers,
1672 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673 		break;
1674 	default:
1675 		break;
1676 	}
1677 	mutex_unlock(&rdev->grbm_idx_mutex);
1678 }
1679 
1680 /**
1681  * cik_get_xclk - get the xclk
1682  *
1683  * @rdev: radeon_device pointer
1684  *
1685  * Returns the reference clock used by the gfx engine
1686  * (CIK).
1687  */
1688 u32 cik_get_xclk(struct radeon_device *rdev)
1689 {
1690         u32 reference_clock = rdev->clock.spll.reference_freq;
1691 
1692 	if (rdev->flags & RADEON_IS_IGP) {
1693 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694 			return reference_clock / 2;
1695 	} else {
1696 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697 			return reference_clock / 4;
1698 	}
1699 	return reference_clock;
1700 }
1701 
1702 /**
1703  * cik_mm_rdoorbell - read a doorbell dword
1704  *
1705  * @rdev: radeon_device pointer
1706  * @index: doorbell index
1707  *
1708  * Returns the value in the doorbell aperture at the
1709  * requested doorbell index (CIK).
1710  */
1711 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712 {
1713 	if (index < rdev->doorbell.num_doorbells) {
1714 		return readl(rdev->doorbell.ptr + index);
1715 	} else {
1716 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717 		return 0;
1718 	}
1719 }
1720 
1721 /**
1722  * cik_mm_wdoorbell - write a doorbell dword
1723  *
1724  * @rdev: radeon_device pointer
1725  * @index: doorbell index
1726  * @v: value to write
1727  *
1728  * Writes @v to the doorbell aperture at the
1729  * requested doorbell index (CIK).
1730  */
1731 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732 {
1733 	if (index < rdev->doorbell.num_doorbells) {
1734 		writel(v, rdev->doorbell.ptr + index);
1735 	} else {
1736 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737 	}
1738 }
1739 
1740 #define BONAIRE_IO_MC_REGS_SIZE 36
1741 
1742 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743 {
1744 	{0x00000070, 0x04400000},
1745 	{0x00000071, 0x80c01803},
1746 	{0x00000072, 0x00004004},
1747 	{0x00000073, 0x00000100},
1748 	{0x00000074, 0x00ff0000},
1749 	{0x00000075, 0x34000000},
1750 	{0x00000076, 0x08000014},
1751 	{0x00000077, 0x00cc08ec},
1752 	{0x00000078, 0x00000400},
1753 	{0x00000079, 0x00000000},
1754 	{0x0000007a, 0x04090000},
1755 	{0x0000007c, 0x00000000},
1756 	{0x0000007e, 0x4408a8e8},
1757 	{0x0000007f, 0x00000304},
1758 	{0x00000080, 0x00000000},
1759 	{0x00000082, 0x00000001},
1760 	{0x00000083, 0x00000002},
1761 	{0x00000084, 0xf3e4f400},
1762 	{0x00000085, 0x052024e3},
1763 	{0x00000087, 0x00000000},
1764 	{0x00000088, 0x01000000},
1765 	{0x0000008a, 0x1c0a0000},
1766 	{0x0000008b, 0xff010000},
1767 	{0x0000008d, 0xffffefff},
1768 	{0x0000008e, 0xfff3efff},
1769 	{0x0000008f, 0xfff3efbf},
1770 	{0x00000092, 0xf7ffffff},
1771 	{0x00000093, 0xffffff7f},
1772 	{0x00000095, 0x00101101},
1773 	{0x00000096, 0x00000fff},
1774 	{0x00000097, 0x00116fff},
1775 	{0x00000098, 0x60010000},
1776 	{0x00000099, 0x10010000},
1777 	{0x0000009a, 0x00006000},
1778 	{0x0000009b, 0x00001000},
1779 	{0x0000009f, 0x00b48000}
1780 };
1781 
1782 #define HAWAII_IO_MC_REGS_SIZE 22
1783 
1784 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785 {
1786 	{0x0000007d, 0x40000000},
1787 	{0x0000007e, 0x40180304},
1788 	{0x0000007f, 0x0000ff00},
1789 	{0x00000081, 0x00000000},
1790 	{0x00000083, 0x00000800},
1791 	{0x00000086, 0x00000000},
1792 	{0x00000087, 0x00000100},
1793 	{0x00000088, 0x00020100},
1794 	{0x00000089, 0x00000000},
1795 	{0x0000008b, 0x00040000},
1796 	{0x0000008c, 0x00000100},
1797 	{0x0000008e, 0xff010000},
1798 	{0x00000090, 0xffffefff},
1799 	{0x00000091, 0xfff3efff},
1800 	{0x00000092, 0xfff3efbf},
1801 	{0x00000093, 0xf7ffffff},
1802 	{0x00000094, 0xffffff7f},
1803 	{0x00000095, 0x00000fff},
1804 	{0x00000096, 0x00116fff},
1805 	{0x00000097, 0x60010000},
1806 	{0x00000098, 0x10010000},
1807 	{0x0000009f, 0x00c79000}
1808 };
1809 
1810 
1811 /**
1812  * cik_srbm_select - select specific register instances
1813  *
1814  * @rdev: radeon_device pointer
1815  * @me: selected ME (micro engine)
1816  * @pipe: pipe
1817  * @queue: queue
1818  * @vmid: VMID
1819  *
1820  * Switches the currently active registers instances.  Some
1821  * registers are instanced per VMID, others are instanced per
1822  * me/pipe/queue combination.
1823  */
1824 static void cik_srbm_select(struct radeon_device *rdev,
1825 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1826 {
1827 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828 			     MEID(me & 0x3) |
1829 			     VMID(vmid & 0xf) |
1830 			     QUEUEID(queue & 0x7));
1831 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832 }
1833 
1834 /* ucode loading */
1835 /**
1836  * ci_mc_load_microcode - load MC ucode into the hw
1837  *
1838  * @rdev: radeon_device pointer
1839  *
1840  * Load the GDDR MC ucode into the hw (CIK).
1841  * Returns 0 on success, error on failure.
1842  */
1843 int ci_mc_load_microcode(struct radeon_device *rdev)
1844 {
1845 	const __be32 *fw_data = NULL;
1846 	const __le32 *new_fw_data = NULL;
1847 	u32 running, blackout = 0, tmp;
1848 	u32 *io_mc_regs = NULL;
1849 	const __le32 *new_io_mc_regs = NULL;
1850 	int i, regs_size, ucode_size;
1851 
1852 	if (!rdev->mc_fw)
1853 		return -EINVAL;
1854 
1855 	if (rdev->new_fw) {
1856 		const struct mc_firmware_header_v1_0 *hdr =
1857 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858 
1859 		radeon_ucode_print_mc_hdr(&hdr->header);
1860 
1861 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862 		new_io_mc_regs = (const __le32 *)
1863 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865 		new_fw_data = (const __le32 *)
1866 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867 	} else {
1868 		ucode_size = rdev->mc_fw->size / 4;
1869 
1870 		switch (rdev->family) {
1871 		case CHIP_BONAIRE:
1872 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874 			break;
1875 		case CHIP_HAWAII:
1876 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1878 			break;
1879 		default:
1880 			return -EINVAL;
1881 		}
1882 		fw_data = (const __be32 *)rdev->mc_fw->data;
1883 	}
1884 
1885 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886 
1887 	if (running == 0) {
1888 		if (running) {
1889 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891 		}
1892 
1893 		/* reset the engine and set to writable */
1894 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896 
1897 		/* load mc io regs */
1898 		for (i = 0; i < regs_size; i++) {
1899 			if (rdev->new_fw) {
1900 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902 			} else {
1903 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905 			}
1906 		}
1907 
1908 		tmp = RREG32(MC_SEQ_MISC0);
1909 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914 		}
1915 
1916 		/* load the MC ucode */
1917 		for (i = 0; i < ucode_size; i++) {
1918 			if (rdev->new_fw)
1919 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920 			else
1921 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922 		}
1923 
1924 		/* put the engine back into the active state */
1925 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928 
1929 		/* wait for training to complete */
1930 		for (i = 0; i < rdev->usec_timeout; i++) {
1931 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932 				break;
1933 			udelay(1);
1934 		}
1935 		for (i = 0; i < rdev->usec_timeout; i++) {
1936 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937 				break;
1938 			udelay(1);
1939 		}
1940 
1941 		if (running)
1942 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943 	}
1944 
1945 	return 0;
1946 }
1947 
1948 /**
1949  * cik_init_microcode - load ucode images from disk
1950  *
1951  * @rdev: radeon_device pointer
1952  *
1953  * Use the firmware interface to load the ucode images into
1954  * the driver (not loaded into hw).
1955  * Returns 0 on success, error on failure.
1956  */
1957 static int cik_init_microcode(struct radeon_device *rdev)
1958 {
1959 	const char *chip_name;
1960 	const char *new_chip_name;
1961 	size_t pfp_req_size, me_req_size, ce_req_size,
1962 		mec_req_size, rlc_req_size, mc_req_size = 0,
1963 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964 	char fw_name[30];
1965 	int new_fw = 0;
1966 	int err;
1967 	int num_fw;
1968 
1969 	DRM_DEBUG("\n");
1970 
1971 	switch (rdev->family) {
1972 	case CHIP_BONAIRE:
1973 		chip_name = "BONAIRE";
1974 		new_chip_name = "bonaire";
1975 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1977 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984 		num_fw = 8;
1985 		break;
1986 	case CHIP_HAWAII:
1987 		chip_name = "HAWAII";
1988 		new_chip_name = "hawaii";
1989 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998 		num_fw = 8;
1999 		break;
2000 	case CHIP_KAVERI:
2001 		chip_name = "KAVERI";
2002 		new_chip_name = "kaveri";
2003 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2005 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009 		num_fw = 7;
2010 		break;
2011 	case CHIP_KABINI:
2012 		chip_name = "KABINI";
2013 		new_chip_name = "kabini";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020 		num_fw = 6;
2021 		break;
2022 	case CHIP_MULLINS:
2023 		chip_name = "MULLINS";
2024 		new_chip_name = "mullins";
2025 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2027 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031 		num_fw = 6;
2032 		break;
2033 	default: BUG();
2034 	}
2035 
2036 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037 
2038 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040 	if (err) {
2041 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043 		if (err)
2044 			goto out;
2045 		if (rdev->pfp_fw->size != pfp_req_size) {
2046 			printk(KERN_ERR
2047 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048 			       rdev->pfp_fw->size, fw_name);
2049 			err = -EINVAL;
2050 			goto out;
2051 		}
2052 	} else {
2053 		err = radeon_ucode_validate(rdev->pfp_fw);
2054 		if (err) {
2055 			printk(KERN_ERR
2056 			       "cik_fw: validation failed for firmware \"%s\"\n",
2057 			       fw_name);
2058 			goto out;
2059 		} else {
2060 			new_fw++;
2061 		}
2062 	}
2063 
2064 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066 	if (err) {
2067 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069 		if (err)
2070 			goto out;
2071 		if (rdev->me_fw->size != me_req_size) {
2072 			printk(KERN_ERR
2073 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074 			       rdev->me_fw->size, fw_name);
2075 			err = -EINVAL;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->me_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->ce_fw->size != ce_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->ce_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->ce_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->mec_fw->size != mec_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->mec_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->mec_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	if (rdev->family == CHIP_KAVERI) {
2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142 		if (err) {
2143 			goto out;
2144 		} else {
2145 			err = radeon_ucode_validate(rdev->mec2_fw);
2146 			if (err) {
2147 				goto out;
2148 			} else {
2149 				new_fw++;
2150 			}
2151 		}
2152 	}
2153 
2154 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156 	if (err) {
2157 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159 		if (err)
2160 			goto out;
2161 		if (rdev->rlc_fw->size != rlc_req_size) {
2162 			printk(KERN_ERR
2163 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164 			       rdev->rlc_fw->size, fw_name);
2165 			err = -EINVAL;
2166 		}
2167 	} else {
2168 		err = radeon_ucode_validate(rdev->rlc_fw);
2169 		if (err) {
2170 			printk(KERN_ERR
2171 			       "cik_fw: validation failed for firmware \"%s\"\n",
2172 			       fw_name);
2173 			goto out;
2174 		} else {
2175 			new_fw++;
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->sdma_fw->size != sdma_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->sdma_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->sdma_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	/* No SMC, MC ucode on APUs */
2205 	if (!(rdev->flags & RADEON_IS_IGP)) {
2206 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208 		if (err) {
2209 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211 			if (err) {
2212 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214 				if (err)
2215 					goto out;
2216 			}
2217 			if ((rdev->mc_fw->size != mc_req_size) &&
2218 			    (rdev->mc_fw->size != mc2_req_size)){
2219 				printk(KERN_ERR
2220 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221 				       rdev->mc_fw->size, fw_name);
2222 				err = -EINVAL;
2223 			}
2224 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225 		} else {
2226 			err = radeon_ucode_validate(rdev->mc_fw);
2227 			if (err) {
2228 				printk(KERN_ERR
2229 				       "cik_fw: validation failed for firmware \"%s\"\n",
2230 				       fw_name);
2231 				goto out;
2232 			} else {
2233 				new_fw++;
2234 			}
2235 		}
2236 
2237 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239 		if (err) {
2240 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242 			if (err) {
2243 				printk(KERN_ERR
2244 				       "smc: error loading firmware \"%s\"\n",
2245 				       fw_name);
2246 				release_firmware(rdev->smc_fw);
2247 				rdev->smc_fw = NULL;
2248 				err = 0;
2249 			} else if (rdev->smc_fw->size != smc_req_size) {
2250 				printk(KERN_ERR
2251 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252 				       rdev->smc_fw->size, fw_name);
2253 				err = -EINVAL;
2254 			}
2255 		} else {
2256 			err = radeon_ucode_validate(rdev->smc_fw);
2257 			if (err) {
2258 				printk(KERN_ERR
2259 				       "cik_fw: validation failed for firmware \"%s\"\n",
2260 				       fw_name);
2261 				goto out;
2262 			} else {
2263 				new_fw++;
2264 			}
2265 		}
2266 	}
2267 
2268 	if (new_fw == 0) {
2269 		rdev->new_fw = false;
2270 	} else if (new_fw < num_fw) {
2271 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272 		err = -EINVAL;
2273 	} else {
2274 		rdev->new_fw = true;
2275 	}
2276 
2277 out:
2278 	if (err) {
2279 		if (err != -EINVAL)
2280 			printk(KERN_ERR
2281 			       "cik_cp: Failed to load firmware \"%s\"\n",
2282 			       fw_name);
2283 		release_firmware(rdev->pfp_fw);
2284 		rdev->pfp_fw = NULL;
2285 		release_firmware(rdev->me_fw);
2286 		rdev->me_fw = NULL;
2287 		release_firmware(rdev->ce_fw);
2288 		rdev->ce_fw = NULL;
2289 		release_firmware(rdev->mec_fw);
2290 		rdev->mec_fw = NULL;
2291 		release_firmware(rdev->mec2_fw);
2292 		rdev->mec2_fw = NULL;
2293 		release_firmware(rdev->rlc_fw);
2294 		rdev->rlc_fw = NULL;
2295 		release_firmware(rdev->sdma_fw);
2296 		rdev->sdma_fw = NULL;
2297 		release_firmware(rdev->mc_fw);
2298 		rdev->mc_fw = NULL;
2299 		release_firmware(rdev->smc_fw);
2300 		rdev->smc_fw = NULL;
2301 	}
2302 	return err;
2303 }
2304 
2305 /*
2306  * Core functions
2307  */
2308 /**
2309  * cik_tiling_mode_table_init - init the hw tiling table
2310  *
2311  * @rdev: radeon_device pointer
2312  *
2313  * Starting with SI, the tiling setup is done globally in a
2314  * set of 32 tiling modes.  Rather than selecting each set of
2315  * parameters per surface as on older asics, we just select
2316  * which index in the tiling table we want to use, and the
2317  * surface uses those parameters (CIK).
2318  */
2319 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320 {
2321 	const u32 num_tile_mode_states = 32;
2322 	const u32 num_secondary_tile_mode_states = 16;
2323 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324 	u32 num_pipe_configs;
2325 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326 		rdev->config.cik.max_shader_engines;
2327 
2328 	switch (rdev->config.cik.mem_row_size_in_kb) {
2329 	case 1:
2330 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331 		break;
2332 	case 2:
2333 	default:
2334 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335 		break;
2336 	case 4:
2337 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338 		break;
2339 	}
2340 
2341 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342 	if (num_pipe_configs > 8)
2343 		num_pipe_configs = 16;
2344 
2345 	if (num_pipe_configs == 16) {
2346 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347 			switch (reg_offset) {
2348 			case 0:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353 				break;
2354 			case 1:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359 				break;
2360 			case 2:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365 				break;
2366 			case 3:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371 				break;
2372 			case 4:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 						 TILE_SPLIT(split_equal_to_row_size));
2377 				break;
2378 			case 5:
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 				break;
2383 			case 6:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388 				break;
2389 			case 7:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 						 TILE_SPLIT(split_equal_to_row_size));
2394 				break;
2395 			case 8:
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398 				break;
2399 			case 9:
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403 				break;
2404 			case 10:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 11:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 				break;
2416 			case 12:
2417 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 				break;
2422 			case 13:
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426 				break;
2427 			case 14:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 				break;
2433 			case 16:
2434 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 				break;
2439 			case 17:
2440 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 				break;
2445 			case 27:
2446 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449 				break;
2450 			case 28:
2451 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 				break;
2456 			case 29:
2457 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 				break;
2462 			case 30:
2463 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467 				break;
2468 			default:
2469 				gb_tile_moden = 0;
2470 				break;
2471 			}
2472 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474 		}
2475 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476 			switch (reg_offset) {
2477 			case 0:
2478 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 						 NUM_BANKS(ADDR_SURF_16_BANK));
2482 				break;
2483 			case 1:
2484 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK));
2488 				break;
2489 			case 2:
2490 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK));
2494 				break;
2495 			case 3:
2496 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 						 NUM_BANKS(ADDR_SURF_16_BANK));
2500 				break;
2501 			case 4:
2502 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 						 NUM_BANKS(ADDR_SURF_8_BANK));
2506 				break;
2507 			case 5:
2508 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 						 NUM_BANKS(ADDR_SURF_4_BANK));
2512 				break;
2513 			case 6:
2514 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517 						 NUM_BANKS(ADDR_SURF_2_BANK));
2518 				break;
2519 			case 8:
2520 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK));
2524 				break;
2525 			case 9:
2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK));
2530 				break;
2531 			case 10:
2532 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK));
2536 				break;
2537 			case 11:
2538 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541 						 NUM_BANKS(ADDR_SURF_8_BANK));
2542 				break;
2543 			case 12:
2544 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 						 NUM_BANKS(ADDR_SURF_4_BANK));
2548 				break;
2549 			case 13:
2550 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 						 NUM_BANKS(ADDR_SURF_2_BANK));
2554 				break;
2555 			case 14:
2556 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 						 NUM_BANKS(ADDR_SURF_2_BANK));
2560 				break;
2561 			default:
2562 				gb_tile_moden = 0;
2563 				break;
2564 			}
2565 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567 		}
2568 	} else if (num_pipe_configs == 8) {
2569 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570 			switch (reg_offset) {
2571 			case 0:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576 				break;
2577 			case 1:
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582 				break;
2583 			case 2:
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588 				break;
2589 			case 3:
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594 				break;
2595 			case 4:
2596 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 						 TILE_SPLIT(split_equal_to_row_size));
2600 				break;
2601 			case 5:
2602 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605 				break;
2606 			case 6:
2607 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611 				break;
2612 			case 7:
2613 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 						 TILE_SPLIT(split_equal_to_row_size));
2617 				break;
2618 			case 8:
2619 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621 				break;
2622 			case 9:
2623 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626 				break;
2627 			case 10:
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 				break;
2633 			case 11:
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638 				break;
2639 			case 12:
2640 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 				break;
2645 			case 13:
2646 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649 				break;
2650 			case 14:
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 				break;
2656 			case 16:
2657 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 				break;
2662 			case 17:
2663 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 				break;
2668 			case 27:
2669 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672 				break;
2673 			case 28:
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 				break;
2679 			case 29:
2680 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684 				break;
2685 			case 30:
2686 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690 				break;
2691 			default:
2692 				gb_tile_moden = 0;
2693 				break;
2694 			}
2695 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697 		}
2698 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699 			switch (reg_offset) {
2700 			case 0:
2701 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704 						 NUM_BANKS(ADDR_SURF_16_BANK));
2705 				break;
2706 			case 1:
2707 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710 						 NUM_BANKS(ADDR_SURF_16_BANK));
2711 				break;
2712 			case 2:
2713 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716 						 NUM_BANKS(ADDR_SURF_16_BANK));
2717 				break;
2718 			case 3:
2719 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722 						 NUM_BANKS(ADDR_SURF_16_BANK));
2723 				break;
2724 			case 4:
2725 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728 						 NUM_BANKS(ADDR_SURF_8_BANK));
2729 				break;
2730 			case 5:
2731 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734 						 NUM_BANKS(ADDR_SURF_4_BANK));
2735 				break;
2736 			case 6:
2737 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740 						 NUM_BANKS(ADDR_SURF_2_BANK));
2741 				break;
2742 			case 8:
2743 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 						 NUM_BANKS(ADDR_SURF_16_BANK));
2747 				break;
2748 			case 9:
2749 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752 						 NUM_BANKS(ADDR_SURF_16_BANK));
2753 				break;
2754 			case 10:
2755 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 						 NUM_BANKS(ADDR_SURF_16_BANK));
2759 				break;
2760 			case 11:
2761 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 						 NUM_BANKS(ADDR_SURF_16_BANK));
2765 				break;
2766 			case 12:
2767 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770 						 NUM_BANKS(ADDR_SURF_8_BANK));
2771 				break;
2772 			case 13:
2773 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776 						 NUM_BANKS(ADDR_SURF_4_BANK));
2777 				break;
2778 			case 14:
2779 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 						 NUM_BANKS(ADDR_SURF_2_BANK));
2783 				break;
2784 			default:
2785 				gb_tile_moden = 0;
2786 				break;
2787 			}
2788 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790 		}
2791 	} else if (num_pipe_configs == 4) {
2792 		if (num_rbs == 4) {
2793 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794 				switch (reg_offset) {
2795 				case 0:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800 					break;
2801 				case 1:
2802 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806 					break;
2807 				case 2:
2808 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812 					break;
2813 				case 3:
2814 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818 					break;
2819 				case 4:
2820 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 							 TILE_SPLIT(split_equal_to_row_size));
2824 					break;
2825 				case 5:
2826 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 					break;
2830 				case 6:
2831 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835 					break;
2836 				case 7:
2837 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840 							 TILE_SPLIT(split_equal_to_row_size));
2841 					break;
2842 				case 8:
2843 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845 					break;
2846 				case 9:
2847 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850 					break;
2851 				case 10:
2852 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 					break;
2857 				case 11:
2858 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862 					break;
2863 				case 12:
2864 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868 					break;
2869 				case 13:
2870 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873 					break;
2874 				case 14:
2875 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 					break;
2880 				case 16:
2881 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 					break;
2886 				case 17:
2887 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 					break;
2892 				case 27:
2893 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896 					break;
2897 				case 28:
2898 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902 					break;
2903 				case 29:
2904 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 					break;
2909 				case 30:
2910 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914 					break;
2915 				default:
2916 					gb_tile_moden = 0;
2917 					break;
2918 				}
2919 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921 			}
2922 		} else if (num_rbs < 4) {
2923 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924 				switch (reg_offset) {
2925 				case 0:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930 					break;
2931 				case 1:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936 					break;
2937 				case 2:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942 					break;
2943 				case 3:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948 					break;
2949 				case 4:
2950 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953 							 TILE_SPLIT(split_equal_to_row_size));
2954 					break;
2955 				case 5:
2956 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959 					break;
2960 				case 6:
2961 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965 					break;
2966 				case 7:
2967 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970 							 TILE_SPLIT(split_equal_to_row_size));
2971 					break;
2972 				case 8:
2973 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975 					break;
2976 				case 9:
2977 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980 					break;
2981 				case 10:
2982 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 					break;
2987 				case 11:
2988 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992 					break;
2993 				case 12:
2994 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998 					break;
2999 				case 13:
3000 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003 					break;
3004 				case 14:
3005 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 					break;
3010 				case 16:
3011 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 					break;
3016 				case 17:
3017 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 					break;
3022 				case 27:
3023 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026 					break;
3027 				case 28:
3028 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 					break;
3033 				case 29:
3034 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038 					break;
3039 				case 30:
3040 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044 					break;
3045 				default:
3046 					gb_tile_moden = 0;
3047 					break;
3048 				}
3049 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051 			}
3052 		}
3053 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054 			switch (reg_offset) {
3055 			case 0:
3056 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 						 NUM_BANKS(ADDR_SURF_16_BANK));
3060 				break;
3061 			case 1:
3062 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 						 NUM_BANKS(ADDR_SURF_16_BANK));
3066 				break;
3067 			case 2:
3068 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071 						 NUM_BANKS(ADDR_SURF_16_BANK));
3072 				break;
3073 			case 3:
3074 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 						 NUM_BANKS(ADDR_SURF_16_BANK));
3078 				break;
3079 			case 4:
3080 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083 						 NUM_BANKS(ADDR_SURF_16_BANK));
3084 				break;
3085 			case 5:
3086 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089 						 NUM_BANKS(ADDR_SURF_8_BANK));
3090 				break;
3091 			case 6:
3092 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095 						 NUM_BANKS(ADDR_SURF_4_BANK));
3096 				break;
3097 			case 8:
3098 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101 						 NUM_BANKS(ADDR_SURF_16_BANK));
3102 				break;
3103 			case 9:
3104 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107 						 NUM_BANKS(ADDR_SURF_16_BANK));
3108 				break;
3109 			case 10:
3110 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113 						 NUM_BANKS(ADDR_SURF_16_BANK));
3114 				break;
3115 			case 11:
3116 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119 						 NUM_BANKS(ADDR_SURF_16_BANK));
3120 				break;
3121 			case 12:
3122 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125 						 NUM_BANKS(ADDR_SURF_16_BANK));
3126 				break;
3127 			case 13:
3128 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131 						 NUM_BANKS(ADDR_SURF_8_BANK));
3132 				break;
3133 			case 14:
3134 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137 						 NUM_BANKS(ADDR_SURF_4_BANK));
3138 				break;
3139 			default:
3140 				gb_tile_moden = 0;
3141 				break;
3142 			}
3143 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145 		}
3146 	} else if (num_pipe_configs == 2) {
3147 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148 			switch (reg_offset) {
3149 			case 0:
3150 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 						 PIPE_CONFIG(ADDR_SURF_P2) |
3153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154 				break;
3155 			case 1:
3156 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 						 PIPE_CONFIG(ADDR_SURF_P2) |
3159 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160 				break;
3161 			case 2:
3162 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164 						 PIPE_CONFIG(ADDR_SURF_P2) |
3165 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166 				break;
3167 			case 3:
3168 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170 						 PIPE_CONFIG(ADDR_SURF_P2) |
3171 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172 				break;
3173 			case 4:
3174 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176 						 PIPE_CONFIG(ADDR_SURF_P2) |
3177 						 TILE_SPLIT(split_equal_to_row_size));
3178 				break;
3179 			case 5:
3180 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183 				break;
3184 			case 6:
3185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187 						 PIPE_CONFIG(ADDR_SURF_P2) |
3188 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189 				break;
3190 			case 7:
3191 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193 						 PIPE_CONFIG(ADDR_SURF_P2) |
3194 						 TILE_SPLIT(split_equal_to_row_size));
3195 				break;
3196 			case 8:
3197 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198 						PIPE_CONFIG(ADDR_SURF_P2);
3199 				break;
3200 			case 9:
3201 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203 						 PIPE_CONFIG(ADDR_SURF_P2));
3204 				break;
3205 			case 10:
3206 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208 						 PIPE_CONFIG(ADDR_SURF_P2) |
3209 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 				break;
3211 			case 11:
3212 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214 						 PIPE_CONFIG(ADDR_SURF_P2) |
3215 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216 				break;
3217 			case 12:
3218 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220 						 PIPE_CONFIG(ADDR_SURF_P2) |
3221 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222 				break;
3223 			case 13:
3224 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225 						 PIPE_CONFIG(ADDR_SURF_P2) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227 				break;
3228 			case 14:
3229 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231 						 PIPE_CONFIG(ADDR_SURF_P2) |
3232 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 				break;
3234 			case 16:
3235 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237 						 PIPE_CONFIG(ADDR_SURF_P2) |
3238 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239 				break;
3240 			case 17:
3241 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 						 PIPE_CONFIG(ADDR_SURF_P2) |
3244 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 				break;
3246 			case 27:
3247 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249 						 PIPE_CONFIG(ADDR_SURF_P2));
3250 				break;
3251 			case 28:
3252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 						 PIPE_CONFIG(ADDR_SURF_P2) |
3255 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 				break;
3257 			case 29:
3258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260 						 PIPE_CONFIG(ADDR_SURF_P2) |
3261 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262 				break;
3263 			case 30:
3264 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266 						 PIPE_CONFIG(ADDR_SURF_P2) |
3267 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268 				break;
3269 			default:
3270 				gb_tile_moden = 0;
3271 				break;
3272 			}
3273 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275 		}
3276 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277 			switch (reg_offset) {
3278 			case 0:
3279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282 						 NUM_BANKS(ADDR_SURF_16_BANK));
3283 				break;
3284 			case 1:
3285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 						 NUM_BANKS(ADDR_SURF_16_BANK));
3289 				break;
3290 			case 2:
3291 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294 						 NUM_BANKS(ADDR_SURF_16_BANK));
3295 				break;
3296 			case 3:
3297 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 						 NUM_BANKS(ADDR_SURF_16_BANK));
3301 				break;
3302 			case 4:
3303 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306 						 NUM_BANKS(ADDR_SURF_16_BANK));
3307 				break;
3308 			case 5:
3309 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 						 NUM_BANKS(ADDR_SURF_16_BANK));
3313 				break;
3314 			case 6:
3315 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318 						 NUM_BANKS(ADDR_SURF_8_BANK));
3319 				break;
3320 			case 8:
3321 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 						 NUM_BANKS(ADDR_SURF_16_BANK));
3325 				break;
3326 			case 9:
3327 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 						 NUM_BANKS(ADDR_SURF_16_BANK));
3331 				break;
3332 			case 10:
3333 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 						 NUM_BANKS(ADDR_SURF_16_BANK));
3337 				break;
3338 			case 11:
3339 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342 						 NUM_BANKS(ADDR_SURF_16_BANK));
3343 				break;
3344 			case 12:
3345 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 						 NUM_BANKS(ADDR_SURF_16_BANK));
3349 				break;
3350 			case 13:
3351 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 						 NUM_BANKS(ADDR_SURF_16_BANK));
3355 				break;
3356 			case 14:
3357 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360 						 NUM_BANKS(ADDR_SURF_8_BANK));
3361 				break;
3362 			default:
3363 				gb_tile_moden = 0;
3364 				break;
3365 			}
3366 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368 		}
3369 	} else
3370 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371 }
3372 
3373 /**
3374  * cik_select_se_sh - select which SE, SH to address
3375  *
3376  * @rdev: radeon_device pointer
3377  * @se_num: shader engine to address
3378  * @sh_num: sh block to address
3379  *
3380  * Select which SE, SH combinations to address. Certain
3381  * registers are instanced per SE or SH.  0xffffffff means
3382  * broadcast to all SEs or SHs (CIK).
3383  */
3384 static void cik_select_se_sh(struct radeon_device *rdev,
3385 			     u32 se_num, u32 sh_num)
3386 {
3387 	u32 data = INSTANCE_BROADCAST_WRITES;
3388 
3389 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391 	else if (se_num == 0xffffffff)
3392 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393 	else if (sh_num == 0xffffffff)
3394 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395 	else
3396 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397 	WREG32(GRBM_GFX_INDEX, data);
3398 }
3399 
3400 /**
3401  * cik_create_bitmask - create a bitmask
3402  *
3403  * @bit_width: length of the mask
3404  *
3405  * create a variable length bit mask (CIK).
3406  * Returns the bitmask.
3407  */
3408 static u32 cik_create_bitmask(u32 bit_width)
3409 {
3410 	u32 i, mask = 0;
3411 
3412 	for (i = 0; i < bit_width; i++) {
3413 		mask <<= 1;
3414 		mask |= 1;
3415 	}
3416 	return mask;
3417 }
3418 
3419 /**
3420  * cik_get_rb_disabled - computes the mask of disabled RBs
3421  *
3422  * @rdev: radeon_device pointer
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  * @se_num: number of SEs (shader engines) for the asic
3425  * @sh_per_se: number of SH blocks per SE for the asic
3426  *
3427  * Calculates the bitmask of disabled RBs (CIK).
3428  * Returns the disabled RB bitmask.
3429  */
3430 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431 			      u32 max_rb_num_per_se,
3432 			      u32 sh_per_se)
3433 {
3434 	u32 data, mask;
3435 
3436 	data = RREG32(CC_RB_BACKEND_DISABLE);
3437 	if (data & 1)
3438 		data &= BACKEND_DISABLE_MASK;
3439 	else
3440 		data = 0;
3441 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442 
3443 	data >>= BACKEND_DISABLE_SHIFT;
3444 
3445 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446 
3447 	return data & mask;
3448 }
3449 
3450 /**
3451  * cik_setup_rb - setup the RBs on the asic
3452  *
3453  * @rdev: radeon_device pointer
3454  * @se_num: number of SEs (shader engines) for the asic
3455  * @sh_per_se: number of SH blocks per SE for the asic
3456  * @max_rb_num: max RBs (render backends) for the asic
3457  *
3458  * Configures per-SE/SH RB registers (CIK).
3459  */
3460 static void cik_setup_rb(struct radeon_device *rdev,
3461 			 u32 se_num, u32 sh_per_se,
3462 			 u32 max_rb_num_per_se)
3463 {
3464 	int i, j;
3465 	u32 data, mask;
3466 	u32 disabled_rbs = 0;
3467 	u32 enabled_rbs = 0;
3468 
3469 	mutex_lock(&rdev->grbm_idx_mutex);
3470 	for (i = 0; i < se_num; i++) {
3471 		for (j = 0; j < sh_per_se; j++) {
3472 			cik_select_se_sh(rdev, i, j);
3473 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474 			if (rdev->family == CHIP_HAWAII)
3475 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476 			else
3477 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478 		}
3479 	}
3480 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481 	mutex_unlock(&rdev->grbm_idx_mutex);
3482 
3483 	mask = 1;
3484 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485 		if (!(disabled_rbs & mask))
3486 			enabled_rbs |= mask;
3487 		mask <<= 1;
3488 	}
3489 
3490 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3491 
3492 	mutex_lock(&rdev->grbm_idx_mutex);
3493 	for (i = 0; i < se_num; i++) {
3494 		cik_select_se_sh(rdev, i, 0xffffffff);
3495 		data = 0;
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			switch (enabled_rbs & 3) {
3498 			case 0:
3499 				if (j == 0)
3500 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501 				else
3502 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503 				break;
3504 			case 1:
3505 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506 				break;
3507 			case 2:
3508 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509 				break;
3510 			case 3:
3511 			default:
3512 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513 				break;
3514 			}
3515 			enabled_rbs >>= 2;
3516 		}
3517 		WREG32(PA_SC_RASTER_CONFIG, data);
3518 	}
3519 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520 	mutex_unlock(&rdev->grbm_idx_mutex);
3521 }
3522 
3523 /**
3524  * cik_gpu_init - setup the 3D engine
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Configures the 3D engine and tiling configuration
3529  * registers so that the 3D engine is usable.
3530  */
3531 static void cik_gpu_init(struct radeon_device *rdev)
3532 {
3533 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534 	u32 mc_shared_chmap, mc_arb_ramcfg;
3535 	u32 hdp_host_path_cntl;
3536 	u32 tmp;
3537 	int i, j;
3538 
3539 	switch (rdev->family) {
3540 	case CHIP_BONAIRE:
3541 		rdev->config.cik.max_shader_engines = 2;
3542 		rdev->config.cik.max_tile_pipes = 4;
3543 		rdev->config.cik.max_cu_per_sh = 7;
3544 		rdev->config.cik.max_sh_per_se = 1;
3545 		rdev->config.cik.max_backends_per_se = 2;
3546 		rdev->config.cik.max_texture_channel_caches = 4;
3547 		rdev->config.cik.max_gprs = 256;
3548 		rdev->config.cik.max_gs_threads = 32;
3549 		rdev->config.cik.max_hw_contexts = 8;
3550 
3551 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556 		break;
3557 	case CHIP_HAWAII:
3558 		rdev->config.cik.max_shader_engines = 4;
3559 		rdev->config.cik.max_tile_pipes = 16;
3560 		rdev->config.cik.max_cu_per_sh = 11;
3561 		rdev->config.cik.max_sh_per_se = 1;
3562 		rdev->config.cik.max_backends_per_se = 4;
3563 		rdev->config.cik.max_texture_channel_caches = 16;
3564 		rdev->config.cik.max_gprs = 256;
3565 		rdev->config.cik.max_gs_threads = 32;
3566 		rdev->config.cik.max_hw_contexts = 8;
3567 
3568 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573 		break;
3574 	case CHIP_KAVERI:
3575 		rdev->config.cik.max_shader_engines = 1;
3576 		rdev->config.cik.max_tile_pipes = 4;
3577 		if ((rdev->pdev->device == 0x1304) ||
3578 		    (rdev->pdev->device == 0x1305) ||
3579 		    (rdev->pdev->device == 0x130C) ||
3580 		    (rdev->pdev->device == 0x130F) ||
3581 		    (rdev->pdev->device == 0x1310) ||
3582 		    (rdev->pdev->device == 0x1311) ||
3583 		    (rdev->pdev->device == 0x131C)) {
3584 			rdev->config.cik.max_cu_per_sh = 8;
3585 			rdev->config.cik.max_backends_per_se = 2;
3586 		} else if ((rdev->pdev->device == 0x1309) ||
3587 			   (rdev->pdev->device == 0x130A) ||
3588 			   (rdev->pdev->device == 0x130D) ||
3589 			   (rdev->pdev->device == 0x1313) ||
3590 			   (rdev->pdev->device == 0x131D)) {
3591 			rdev->config.cik.max_cu_per_sh = 6;
3592 			rdev->config.cik.max_backends_per_se = 2;
3593 		} else if ((rdev->pdev->device == 0x1306) ||
3594 			   (rdev->pdev->device == 0x1307) ||
3595 			   (rdev->pdev->device == 0x130B) ||
3596 			   (rdev->pdev->device == 0x130E) ||
3597 			   (rdev->pdev->device == 0x1315) ||
3598 			   (rdev->pdev->device == 0x1318) ||
3599 			   (rdev->pdev->device == 0x131B)) {
3600 			rdev->config.cik.max_cu_per_sh = 4;
3601 			rdev->config.cik.max_backends_per_se = 1;
3602 		} else {
3603 			rdev->config.cik.max_cu_per_sh = 3;
3604 			rdev->config.cik.max_backends_per_se = 1;
3605 		}
3606 		rdev->config.cik.max_sh_per_se = 1;
3607 		rdev->config.cik.max_texture_channel_caches = 4;
3608 		rdev->config.cik.max_gprs = 256;
3609 		rdev->config.cik.max_gs_threads = 16;
3610 		rdev->config.cik.max_hw_contexts = 8;
3611 
3612 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617 		break;
3618 	case CHIP_KABINI:
3619 	case CHIP_MULLINS:
3620 	default:
3621 		rdev->config.cik.max_shader_engines = 1;
3622 		rdev->config.cik.max_tile_pipes = 2;
3623 		rdev->config.cik.max_cu_per_sh = 2;
3624 		rdev->config.cik.max_sh_per_se = 1;
3625 		rdev->config.cik.max_backends_per_se = 1;
3626 		rdev->config.cik.max_texture_channel_caches = 2;
3627 		rdev->config.cik.max_gprs = 256;
3628 		rdev->config.cik.max_gs_threads = 16;
3629 		rdev->config.cik.max_hw_contexts = 8;
3630 
3631 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636 		break;
3637 	}
3638 
3639 	/* Initialize HDP */
3640 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641 		WREG32((0x2c14 + j), 0x00000000);
3642 		WREG32((0x2c18 + j), 0x00000000);
3643 		WREG32((0x2c1c + j), 0x00000000);
3644 		WREG32((0x2c20 + j), 0x00000000);
3645 		WREG32((0x2c24 + j), 0x00000000);
3646 	}
3647 
3648 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649 	WREG32(SRBM_INT_CNTL, 0x1);
3650 	WREG32(SRBM_INT_ACK, 0x1);
3651 
3652 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653 
3654 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656 
3657 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3659 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3662 		rdev->config.cik.mem_row_size_in_kb = 4;
3663 	/* XXX use MC settings? */
3664 	rdev->config.cik.shader_engine_tile_size = 32;
3665 	rdev->config.cik.num_gpus = 1;
3666 	rdev->config.cik.multi_gpu_tile_size = 64;
3667 
3668 	/* fix up row size */
3669 	gb_addr_config &= ~ROW_SIZE_MASK;
3670 	switch (rdev->config.cik.mem_row_size_in_kb) {
3671 	case 1:
3672 	default:
3673 		gb_addr_config |= ROW_SIZE(0);
3674 		break;
3675 	case 2:
3676 		gb_addr_config |= ROW_SIZE(1);
3677 		break;
3678 	case 4:
3679 		gb_addr_config |= ROW_SIZE(2);
3680 		break;
3681 	}
3682 
3683 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3684 	 * not have bank info, so create a custom tiling dword.
3685 	 * bits 3:0   num_pipes
3686 	 * bits 7:4   num_banks
3687 	 * bits 11:8  group_size
3688 	 * bits 15:12 row_size
3689 	 */
3690 	rdev->config.cik.tile_config = 0;
3691 	switch (rdev->config.cik.num_tile_pipes) {
3692 	case 1:
3693 		rdev->config.cik.tile_config |= (0 << 0);
3694 		break;
3695 	case 2:
3696 		rdev->config.cik.tile_config |= (1 << 0);
3697 		break;
3698 	case 4:
3699 		rdev->config.cik.tile_config |= (2 << 0);
3700 		break;
3701 	case 8:
3702 	default:
3703 		/* XXX what about 12? */
3704 		rdev->config.cik.tile_config |= (3 << 0);
3705 		break;
3706 	}
3707 	rdev->config.cik.tile_config |=
3708 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709 	rdev->config.cik.tile_config |=
3710 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711 	rdev->config.cik.tile_config |=
3712 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713 
3714 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722 
3723 	cik_tiling_mode_table_init(rdev);
3724 
3725 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726 		     rdev->config.cik.max_sh_per_se,
3727 		     rdev->config.cik.max_backends_per_se);
3728 
3729 	rdev->config.cik.active_cus = 0;
3730 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732 			rdev->config.cik.active_cus +=
3733 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734 		}
3735 	}
3736 
3737 	/* set HW defaults for 3D engine */
3738 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739 
3740 	mutex_lock(&rdev->grbm_idx_mutex);
3741 	/*
3742 	 * making sure that the following register writes will be broadcasted
3743 	 * to all the shaders
3744 	 */
3745 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746 	WREG32(SX_DEBUG_1, 0x20);
3747 
3748 	WREG32(TA_CNTL_AUX, 0x00010000);
3749 
3750 	tmp = RREG32(SPI_CONFIG_CNTL);
3751 	tmp |= 0x03000000;
3752 	WREG32(SPI_CONFIG_CNTL, tmp);
3753 
3754 	WREG32(SQ_CONFIG, 1);
3755 
3756 	WREG32(DB_DEBUG, 0);
3757 
3758 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759 	tmp |= 0x00000400;
3760 	WREG32(DB_DEBUG2, tmp);
3761 
3762 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763 	tmp |= 0x00020200;
3764 	WREG32(DB_DEBUG3, tmp);
3765 
3766 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767 	tmp |= 0x00018208;
3768 	WREG32(CB_HW_CONTROL, tmp);
3769 
3770 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771 
3772 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776 
3777 	WREG32(VGT_NUM_INSTANCES, 1);
3778 
3779 	WREG32(CP_PERFMON_CNTL, 0);
3780 
3781 	WREG32(SQ_CONFIG, 0);
3782 
3783 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784 					  FORCE_EOV_MAX_REZ_CNT(255)));
3785 
3786 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788 
3789 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3790 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791 
3792 	tmp = RREG32(HDP_MISC_CNTL);
3793 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794 	WREG32(HDP_MISC_CNTL, tmp);
3795 
3796 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798 
3799 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801 	mutex_unlock(&rdev->grbm_idx_mutex);
3802 
3803 	udelay(50);
3804 }
3805 
3806 /*
3807  * GPU scratch registers helpers function.
3808  */
3809 /**
3810  * cik_scratch_init - setup driver info for CP scratch regs
3811  *
3812  * @rdev: radeon_device pointer
3813  *
3814  * Set up the number and offset of the CP scratch registers.
3815  * NOTE: use of CP scratch registers is a legacy inferface and
3816  * is not used by default on newer asics (r6xx+).  On newer asics,
3817  * memory buffers are used for fences rather than scratch regs.
3818  */
3819 static void cik_scratch_init(struct radeon_device *rdev)
3820 {
3821 	int i;
3822 
3823 	rdev->scratch.num_reg = 7;
3824 	rdev->scratch.reg_base = SCRATCH_REG0;
3825 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3826 		rdev->scratch.free[i] = true;
3827 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828 	}
3829 }
3830 
3831 /**
3832  * cik_ring_test - basic gfx ring test
3833  *
3834  * @rdev: radeon_device pointer
3835  * @ring: radeon_ring structure holding ring information
3836  *
3837  * Allocate a scratch register and write to it using the gfx ring (CIK).
3838  * Provides a basic gfx ring test to verify that the ring is working.
3839  * Used by cik_cp_gfx_resume();
3840  * Returns 0 on success, error on failure.
3841  */
3842 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843 {
3844 	uint32_t scratch;
3845 	uint32_t tmp = 0;
3846 	unsigned i;
3847 	int r;
3848 
3849 	r = radeon_scratch_get(rdev, &scratch);
3850 	if (r) {
3851 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852 		return r;
3853 	}
3854 	WREG32(scratch, 0xCAFEDEAD);
3855 	r = radeon_ring_lock(rdev, ring, 3);
3856 	if (r) {
3857 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858 		radeon_scratch_free(rdev, scratch);
3859 		return r;
3860 	}
3861 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863 	radeon_ring_write(ring, 0xDEADBEEF);
3864 	radeon_ring_unlock_commit(rdev, ring, false);
3865 
3866 	for (i = 0; i < rdev->usec_timeout; i++) {
3867 		tmp = RREG32(scratch);
3868 		if (tmp == 0xDEADBEEF)
3869 			break;
3870 		DRM_UDELAY(1);
3871 	}
3872 	if (i < rdev->usec_timeout) {
3873 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874 	} else {
3875 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876 			  ring->idx, scratch, tmp);
3877 		r = -EINVAL;
3878 	}
3879 	radeon_scratch_free(rdev, scratch);
3880 	return r;
3881 }
3882 
3883 /**
3884  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885  *
3886  * @rdev: radeon_device pointer
3887  * @ridx: radeon ring index
3888  *
3889  * Emits an hdp flush on the cp.
3890  */
3891 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892 				       int ridx)
3893 {
3894 	struct radeon_ring *ring = &rdev->ring[ridx];
3895 	u32 ref_and_mask;
3896 
3897 	switch (ring->idx) {
3898 	case CAYMAN_RING_TYPE_CP1_INDEX:
3899 	case CAYMAN_RING_TYPE_CP2_INDEX:
3900 	default:
3901 		switch (ring->me) {
3902 		case 0:
3903 			ref_and_mask = CP2 << ring->pipe;
3904 			break;
3905 		case 1:
3906 			ref_and_mask = CP6 << ring->pipe;
3907 			break;
3908 		default:
3909 			return;
3910 		}
3911 		break;
3912 	case RADEON_RING_TYPE_GFX_INDEX:
3913 		ref_and_mask = CP0;
3914 		break;
3915 	}
3916 
3917 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3920 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3921 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923 	radeon_ring_write(ring, ref_and_mask);
3924 	radeon_ring_write(ring, ref_and_mask);
3925 	radeon_ring_write(ring, 0x20); /* poll interval */
3926 }
3927 
3928 /**
3929  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930  *
3931  * @rdev: radeon_device pointer
3932  * @fence: radeon fence object
3933  *
3934  * Emits a fence sequnce number on the gfx ring and flushes
3935  * GPU caches.
3936  */
3937 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938 			     struct radeon_fence *fence)
3939 {
3940 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3941 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942 
3943 	/* Workaround for cache flush problems. First send a dummy EOP
3944 	 * event down the pipe with seq one below.
3945 	 */
3946 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948 				 EOP_TC_ACTION_EN |
3949 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950 				 EVENT_INDEX(5)));
3951 	radeon_ring_write(ring, addr & 0xfffffffc);
3952 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953 				DATA_SEL(1) | INT_SEL(0));
3954 	radeon_ring_write(ring, fence->seq - 1);
3955 	radeon_ring_write(ring, 0);
3956 
3957 	/* Then send the real EOP event down the pipe. */
3958 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960 				 EOP_TC_ACTION_EN |
3961 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962 				 EVENT_INDEX(5)));
3963 	radeon_ring_write(ring, addr & 0xfffffffc);
3964 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965 	radeon_ring_write(ring, fence->seq);
3966 	radeon_ring_write(ring, 0);
3967 }
3968 
3969 /**
3970  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971  *
3972  * @rdev: radeon_device pointer
3973  * @fence: radeon fence object
3974  *
3975  * Emits a fence sequnce number on the compute ring and flushes
3976  * GPU caches.
3977  */
3978 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979 				 struct radeon_fence *fence)
3980 {
3981 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3982 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983 
3984 	/* RELEASE_MEM - flush caches, send int */
3985 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987 				 EOP_TC_ACTION_EN |
3988 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989 				 EVENT_INDEX(5)));
3990 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991 	radeon_ring_write(ring, addr & 0xfffffffc);
3992 	radeon_ring_write(ring, upper_32_bits(addr));
3993 	radeon_ring_write(ring, fence->seq);
3994 	radeon_ring_write(ring, 0);
3995 }
3996 
3997 /**
3998  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999  *
4000  * @rdev: radeon_device pointer
4001  * @ring: radeon ring buffer object
4002  * @semaphore: radeon semaphore object
4003  * @emit_wait: Is this a sempahore wait?
4004  *
4005  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006  * from running ahead of semaphore waits.
4007  */
4008 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009 			     struct radeon_ring *ring,
4010 			     struct radeon_semaphore *semaphore,
4011 			     bool emit_wait)
4012 {
4013 	uint64_t addr = semaphore->gpu_addr;
4014 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015 
4016 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017 	radeon_ring_write(ring, lower_32_bits(addr));
4018 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019 
4020 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021 		/* Prevent the PFP from running ahead of the semaphore wait */
4022 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023 		radeon_ring_write(ring, 0x0);
4024 	}
4025 
4026 	return true;
4027 }
4028 
4029 /**
4030  * cik_copy_cpdma - copy pages using the CP DMA engine
4031  *
4032  * @rdev: radeon_device pointer
4033  * @src_offset: src GPU address
4034  * @dst_offset: dst GPU address
4035  * @num_gpu_pages: number of GPU pages to xfer
4036  * @resv: reservation object to sync to
4037  *
4038  * Copy GPU paging using the CP DMA engine (CIK+).
4039  * Used by the radeon ttm implementation to move pages if
4040  * registered as the asic copy callback.
4041  */
4042 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043 				    uint64_t src_offset, uint64_t dst_offset,
4044 				    unsigned num_gpu_pages,
4045 				    struct reservation_object *resv)
4046 {
4047 	struct radeon_fence *fence;
4048 	struct radeon_sync sync;
4049 	int ring_index = rdev->asic->copy.blit_ring_index;
4050 	struct radeon_ring *ring = &rdev->ring[ring_index];
4051 	u32 size_in_bytes, cur_size_in_bytes, control;
4052 	int i, num_loops;
4053 	int r = 0;
4054 
4055 	radeon_sync_create(&sync);
4056 
4057 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060 	if (r) {
4061 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4062 		radeon_sync_free(rdev, &sync, NULL);
4063 		return ERR_PTR(r);
4064 	}
4065 
4066 	radeon_sync_resv(rdev, &sync, resv, false);
4067 	radeon_sync_rings(rdev, &sync, ring->idx);
4068 
4069 	for (i = 0; i < num_loops; i++) {
4070 		cur_size_in_bytes = size_in_bytes;
4071 		if (cur_size_in_bytes > 0x1fffff)
4072 			cur_size_in_bytes = 0x1fffff;
4073 		size_in_bytes -= cur_size_in_bytes;
4074 		control = 0;
4075 		if (size_in_bytes == 0)
4076 			control |= PACKET3_DMA_DATA_CP_SYNC;
4077 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078 		radeon_ring_write(ring, control);
4079 		radeon_ring_write(ring, lower_32_bits(src_offset));
4080 		radeon_ring_write(ring, upper_32_bits(src_offset));
4081 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4082 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4083 		radeon_ring_write(ring, cur_size_in_bytes);
4084 		src_offset += cur_size_in_bytes;
4085 		dst_offset += cur_size_in_bytes;
4086 	}
4087 
4088 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4089 	if (r) {
4090 		radeon_ring_unlock_undo(rdev, ring);
4091 		radeon_sync_free(rdev, &sync, NULL);
4092 		return ERR_PTR(r);
4093 	}
4094 
4095 	radeon_ring_unlock_commit(rdev, ring, false);
4096 	radeon_sync_free(rdev, &sync, fence);
4097 
4098 	return fence;
4099 }
4100 
4101 /*
4102  * IB stuff
4103  */
4104 /**
4105  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ib: radeon indirect buffer object
4109  *
4110  * Emits an DE (drawing engine) or CE (constant engine) IB
4111  * on the gfx ring.  IBs are usually generated by userspace
4112  * acceleration drivers and submitted to the kernel for
4113  * sheduling on the ring.  This function schedules the IB
4114  * on the gfx ring for execution by the GPU.
4115  */
4116 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117 {
4118 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4119 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120 	u32 header, control = INDIRECT_BUFFER_VALID;
4121 
4122 	if (ib->is_const_ib) {
4123 		/* set switch buffer packet before const IB */
4124 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125 		radeon_ring_write(ring, 0);
4126 
4127 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128 	} else {
4129 		u32 next_rptr;
4130 		if (ring->rptr_save_reg) {
4131 			next_rptr = ring->wptr + 3 + 4;
4132 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4134 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4135 			radeon_ring_write(ring, next_rptr);
4136 		} else if (rdev->wb.enabled) {
4137 			next_rptr = ring->wptr + 5 + 4;
4138 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142 			radeon_ring_write(ring, next_rptr);
4143 		}
4144 
4145 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146 	}
4147 
4148 	control |= ib->length_dw | (vm_id << 24);
4149 
4150 	radeon_ring_write(ring, header);
4151 	radeon_ring_write(ring,
4152 #ifdef __BIG_ENDIAN
4153 			  (2 << 0) |
4154 #endif
4155 			  (ib->gpu_addr & 0xFFFFFFFC));
4156 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157 	radeon_ring_write(ring, control);
4158 }
4159 
4160 /**
4161  * cik_ib_test - basic gfx ring IB test
4162  *
4163  * @rdev: radeon_device pointer
4164  * @ring: radeon_ring structure holding ring information
4165  *
4166  * Allocate an IB and execute it on the gfx ring (CIK).
4167  * Provides a basic gfx ring test to verify that IBs are working.
4168  * Returns 0 on success, error on failure.
4169  */
4170 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171 {
4172 	struct radeon_ib ib;
4173 	uint32_t scratch;
4174 	uint32_t tmp = 0;
4175 	unsigned i;
4176 	int r;
4177 
4178 	r = radeon_scratch_get(rdev, &scratch);
4179 	if (r) {
4180 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181 		return r;
4182 	}
4183 	WREG32(scratch, 0xCAFEDEAD);
4184 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185 	if (r) {
4186 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187 		radeon_scratch_free(rdev, scratch);
4188 		return r;
4189 	}
4190 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192 	ib.ptr[2] = 0xDEADBEEF;
4193 	ib.length_dw = 3;
4194 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195 	if (r) {
4196 		radeon_scratch_free(rdev, scratch);
4197 		radeon_ib_free(rdev, &ib);
4198 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199 		return r;
4200 	}
4201 	r = radeon_fence_wait(ib.fence, false);
4202 	if (r) {
4203 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204 		radeon_scratch_free(rdev, scratch);
4205 		radeon_ib_free(rdev, &ib);
4206 		return r;
4207 	}
4208 	for (i = 0; i < rdev->usec_timeout; i++) {
4209 		tmp = RREG32(scratch);
4210 		if (tmp == 0xDEADBEEF)
4211 			break;
4212 		DRM_UDELAY(1);
4213 	}
4214 	if (i < rdev->usec_timeout) {
4215 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216 	} else {
4217 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218 			  scratch, tmp);
4219 		r = -EINVAL;
4220 	}
4221 	radeon_scratch_free(rdev, scratch);
4222 	radeon_ib_free(rdev, &ib);
4223 	return r;
4224 }
4225 
4226 /*
4227  * CP.
4228  * On CIK, gfx and compute now have independant command processors.
4229  *
4230  * GFX
4231  * Gfx consists of a single ring and can process both gfx jobs and
4232  * compute jobs.  The gfx CP consists of three microengines (ME):
4233  * PFP - Pre-Fetch Parser
4234  * ME - Micro Engine
4235  * CE - Constant Engine
4236  * The PFP and ME make up what is considered the Drawing Engine (DE).
4237  * The CE is an asynchronous engine used for updating buffer desciptors
4238  * used by the DE so that they can be loaded into cache in parallel
4239  * while the DE is processing state update packets.
4240  *
4241  * Compute
4242  * The compute CP consists of two microengines (ME):
4243  * MEC1 - Compute MicroEngine 1
4244  * MEC2 - Compute MicroEngine 2
4245  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246  * The queues are exposed to userspace and are programmed directly
4247  * by the compute runtime.
4248  */
4249 /**
4250  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the gfx MEs.
4256  */
4257 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258 {
4259 	if (enable)
4260 		WREG32(CP_ME_CNTL, 0);
4261 	else {
4262 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266 	}
4267 	udelay(50);
4268 }
4269 
4270 /**
4271  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272  *
4273  * @rdev: radeon_device pointer
4274  *
4275  * Loads the gfx PFP, ME, and CE ucode.
4276  * Returns 0 for success, -EINVAL if the ucode is not available.
4277  */
4278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279 {
4280 	int i;
4281 
4282 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283 		return -EINVAL;
4284 
4285 	cik_cp_gfx_enable(rdev, false);
4286 
4287 	if (rdev->new_fw) {
4288 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4291 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292 		const struct gfx_firmware_header_v1_0 *me_hdr =
4293 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294 		const __le32 *fw_data;
4295 		u32 fw_size;
4296 
4297 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300 
4301 		/* PFP */
4302 		fw_data = (const __le32 *)
4303 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305 		WREG32(CP_PFP_UCODE_ADDR, 0);
4306 		for (i = 0; i < fw_size; i++)
4307 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309 
4310 		/* CE */
4311 		fw_data = (const __le32 *)
4312 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314 		WREG32(CP_CE_UCODE_ADDR, 0);
4315 		for (i = 0; i < fw_size; i++)
4316 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318 
4319 		/* ME */
4320 		fw_data = (const __be32 *)
4321 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323 		WREG32(CP_ME_RAM_WADDR, 0);
4324 		for (i = 0; i < fw_size; i++)
4325 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328 	} else {
4329 		const __be32 *fw_data;
4330 
4331 		/* PFP */
4332 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4333 		WREG32(CP_PFP_UCODE_ADDR, 0);
4334 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336 		WREG32(CP_PFP_UCODE_ADDR, 0);
4337 
4338 		/* CE */
4339 		fw_data = (const __be32 *)rdev->ce_fw->data;
4340 		WREG32(CP_CE_UCODE_ADDR, 0);
4341 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343 		WREG32(CP_CE_UCODE_ADDR, 0);
4344 
4345 		/* ME */
4346 		fw_data = (const __be32 *)rdev->me_fw->data;
4347 		WREG32(CP_ME_RAM_WADDR, 0);
4348 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350 		WREG32(CP_ME_RAM_WADDR, 0);
4351 	}
4352 
4353 	return 0;
4354 }
4355 
4356 /**
4357  * cik_cp_gfx_start - start the gfx ring
4358  *
4359  * @rdev: radeon_device pointer
4360  *
4361  * Enables the ring and loads the clear state context and other
4362  * packets required to init the ring.
4363  * Returns 0 for success, error for failure.
4364  */
4365 static int cik_cp_gfx_start(struct radeon_device *rdev)
4366 {
4367 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368 	int r, i;
4369 
4370 	/* init the CP */
4371 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372 	WREG32(CP_ENDIAN_SWAP, 0);
4373 	WREG32(CP_DEVICE_ID, 1);
4374 
4375 	cik_cp_gfx_enable(rdev, true);
4376 
4377 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378 	if (r) {
4379 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380 		return r;
4381 	}
4382 
4383 	/* init the CE partitions.  CE only used for gfx on CIK */
4384 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386 	radeon_ring_write(ring, 0x8000);
4387 	radeon_ring_write(ring, 0x8000);
4388 
4389 	/* setup clear context state */
4390 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392 
4393 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394 	radeon_ring_write(ring, 0x80000000);
4395 	radeon_ring_write(ring, 0x80000000);
4396 
4397 	for (i = 0; i < cik_default_size; i++)
4398 		radeon_ring_write(ring, cik_default_state[i]);
4399 
4400 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402 
4403 	/* set clear context state */
4404 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405 	radeon_ring_write(ring, 0);
4406 
4407 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408 	radeon_ring_write(ring, 0x00000316);
4409 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411 
4412 	radeon_ring_unlock_commit(rdev, ring, false);
4413 
4414 	return 0;
4415 }
4416 
4417 /**
4418  * cik_cp_gfx_fini - stop the gfx ring
4419  *
4420  * @rdev: radeon_device pointer
4421  *
4422  * Stop the gfx ring and tear down the driver ring
4423  * info.
4424  */
4425 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426 {
4427 	cik_cp_gfx_enable(rdev, false);
4428 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429 }
4430 
4431 /**
4432  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433  *
4434  * @rdev: radeon_device pointer
4435  *
4436  * Program the location and size of the gfx ring buffer
4437  * and test it to make sure it's working.
4438  * Returns 0 for success, error for failure.
4439  */
4440 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441 {
4442 	struct radeon_ring *ring;
4443 	u32 tmp;
4444 	u32 rb_bufsz;
4445 	u64 rb_addr;
4446 	int r;
4447 
4448 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449 	if (rdev->family != CHIP_HAWAII)
4450 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451 
4452 	/* Set the write pointer delay */
4453 	WREG32(CP_RB_WPTR_DELAY, 0);
4454 
4455 	/* set the RB to use vmid 0 */
4456 	WREG32(CP_RB_VMID, 0);
4457 
4458 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459 
4460 	/* ring 0 - compute and gfx */
4461 	/* Set ring buffer size */
4462 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463 	rb_bufsz = order_base_2(ring->ring_size / 8);
4464 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465 #ifdef __BIG_ENDIAN
4466 	tmp |= BUF_SWAP_32BIT;
4467 #endif
4468 	WREG32(CP_RB0_CNTL, tmp);
4469 
4470 	/* Initialize the ring buffer's read and write pointers */
4471 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472 	ring->wptr = 0;
4473 	WREG32(CP_RB0_WPTR, ring->wptr);
4474 
4475 	/* set the wb address wether it's enabled or not */
4476 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478 
4479 	/* scratch register shadowing is no longer supported */
4480 	WREG32(SCRATCH_UMSK, 0);
4481 
4482 	if (!rdev->wb.enabled)
4483 		tmp |= RB_NO_UPDATE;
4484 
4485 	mdelay(1);
4486 	WREG32(CP_RB0_CNTL, tmp);
4487 
4488 	rb_addr = ring->gpu_addr >> 8;
4489 	WREG32(CP_RB0_BASE, rb_addr);
4490 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491 
4492 	/* start the ring */
4493 	cik_cp_gfx_start(rdev);
4494 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496 	if (r) {
4497 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498 		return r;
4499 	}
4500 
4501 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503 
4504 	return 0;
4505 }
4506 
4507 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508 		     struct radeon_ring *ring)
4509 {
4510 	u32 rptr;
4511 
4512 	if (rdev->wb.enabled)
4513 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4514 	else
4515 		rptr = RREG32(CP_RB0_RPTR);
4516 
4517 	return rptr;
4518 }
4519 
4520 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521 		     struct radeon_ring *ring)
4522 {
4523 	u32 wptr;
4524 
4525 	wptr = RREG32(CP_RB0_WPTR);
4526 
4527 	return wptr;
4528 }
4529 
4530 void cik_gfx_set_wptr(struct radeon_device *rdev,
4531 		      struct radeon_ring *ring)
4532 {
4533 	WREG32(CP_RB0_WPTR, ring->wptr);
4534 	(void)RREG32(CP_RB0_WPTR);
4535 }
4536 
4537 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538 			 struct radeon_ring *ring)
4539 {
4540 	u32 rptr;
4541 
4542 	if (rdev->wb.enabled) {
4543 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4544 	} else {
4545 		mutex_lock(&rdev->srbm_mutex);
4546 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547 		rptr = RREG32(CP_HQD_PQ_RPTR);
4548 		cik_srbm_select(rdev, 0, 0, 0, 0);
4549 		mutex_unlock(&rdev->srbm_mutex);
4550 	}
4551 
4552 	return rptr;
4553 }
4554 
4555 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556 			 struct radeon_ring *ring)
4557 {
4558 	u32 wptr;
4559 
4560 	if (rdev->wb.enabled) {
4561 		/* XXX check if swapping is necessary on BE */
4562 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4563 	} else {
4564 		mutex_lock(&rdev->srbm_mutex);
4565 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566 		wptr = RREG32(CP_HQD_PQ_WPTR);
4567 		cik_srbm_select(rdev, 0, 0, 0, 0);
4568 		mutex_unlock(&rdev->srbm_mutex);
4569 	}
4570 
4571 	return wptr;
4572 }
4573 
4574 void cik_compute_set_wptr(struct radeon_device *rdev,
4575 			  struct radeon_ring *ring)
4576 {
4577 	/* XXX check if swapping is necessary on BE */
4578 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4580 }
4581 
4582 /**
4583  * cik_cp_compute_enable - enable/disable the compute CP MEs
4584  *
4585  * @rdev: radeon_device pointer
4586  * @enable: enable or disable the MEs
4587  *
4588  * Halts or unhalts the compute MEs.
4589  */
4590 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4591 {
4592 	if (enable)
4593 		WREG32(CP_MEC_CNTL, 0);
4594 	else {
4595 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4596 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4597 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4598 	}
4599 	udelay(50);
4600 }
4601 
4602 /**
4603  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4604  *
4605  * @rdev: radeon_device pointer
4606  *
4607  * Loads the compute MEC1&2 ucode.
4608  * Returns 0 for success, -EINVAL if the ucode is not available.
4609  */
4610 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4611 {
4612 	int i;
4613 
4614 	if (!rdev->mec_fw)
4615 		return -EINVAL;
4616 
4617 	cik_cp_compute_enable(rdev, false);
4618 
4619 	if (rdev->new_fw) {
4620 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4621 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4622 		const __le32 *fw_data;
4623 		u32 fw_size;
4624 
4625 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4626 
4627 		/* MEC1 */
4628 		fw_data = (const __le32 *)
4629 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4630 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4631 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4632 		for (i = 0; i < fw_size; i++)
4633 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4634 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4635 
4636 		/* MEC2 */
4637 		if (rdev->family == CHIP_KAVERI) {
4638 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4639 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4640 
4641 			fw_data = (const __le32 *)
4642 				(rdev->mec2_fw->data +
4643 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4644 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4645 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4646 			for (i = 0; i < fw_size; i++)
4647 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4648 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4649 		}
4650 	} else {
4651 		const __be32 *fw_data;
4652 
4653 		/* MEC1 */
4654 		fw_data = (const __be32 *)rdev->mec_fw->data;
4655 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4656 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4657 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4658 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4659 
4660 		if (rdev->family == CHIP_KAVERI) {
4661 			/* MEC2 */
4662 			fw_data = (const __be32 *)rdev->mec_fw->data;
4663 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4664 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4665 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4666 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4667 		}
4668 	}
4669 
4670 	return 0;
4671 }
4672 
4673 /**
4674  * cik_cp_compute_start - start the compute queues
4675  *
4676  * @rdev: radeon_device pointer
4677  *
4678  * Enable the compute queues.
4679  * Returns 0 for success, error for failure.
4680  */
4681 static int cik_cp_compute_start(struct radeon_device *rdev)
4682 {
4683 	cik_cp_compute_enable(rdev, true);
4684 
4685 	return 0;
4686 }
4687 
4688 /**
4689  * cik_cp_compute_fini - stop the compute queues
4690  *
4691  * @rdev: radeon_device pointer
4692  *
4693  * Stop the compute queues and tear down the driver queue
4694  * info.
4695  */
4696 static void cik_cp_compute_fini(struct radeon_device *rdev)
4697 {
4698 	int i, idx, r;
4699 
4700 	cik_cp_compute_enable(rdev, false);
4701 
4702 	for (i = 0; i < 2; i++) {
4703 		if (i == 0)
4704 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4705 		else
4706 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4707 
4708 		if (rdev->ring[idx].mqd_obj) {
4709 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4710 			if (unlikely(r != 0))
4711 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4712 
4713 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4714 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4715 
4716 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4717 			rdev->ring[idx].mqd_obj = NULL;
4718 		}
4719 	}
4720 }
4721 
4722 static void cik_mec_fini(struct radeon_device *rdev)
4723 {
4724 	int r;
4725 
4726 	if (rdev->mec.hpd_eop_obj) {
4727 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4728 		if (unlikely(r != 0))
4729 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4730 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4731 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4732 
4733 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4734 		rdev->mec.hpd_eop_obj = NULL;
4735 	}
4736 }
4737 
4738 #define MEC_HPD_SIZE 2048
4739 
4740 static int cik_mec_init(struct radeon_device *rdev)
4741 {
4742 	int r;
4743 	u32 *hpd;
4744 
4745 	/*
4746 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4747 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4748 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4749 	 * be handled by KFD
4750 	 */
4751 	rdev->mec.num_mec = 1;
4752 	rdev->mec.num_pipe = 1;
4753 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4754 
4755 	if (rdev->mec.hpd_eop_obj == NULL) {
4756 		r = radeon_bo_create(rdev,
4757 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4758 				     PAGE_SIZE, true,
4759 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4760 				     &rdev->mec.hpd_eop_obj);
4761 		if (r) {
4762 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4763 			return r;
4764 		}
4765 	}
4766 
4767 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4768 	if (unlikely(r != 0)) {
4769 		cik_mec_fini(rdev);
4770 		return r;
4771 	}
4772 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4773 			  &rdev->mec.hpd_eop_gpu_addr);
4774 	if (r) {
4775 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4776 		cik_mec_fini(rdev);
4777 		return r;
4778 	}
4779 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4780 	if (r) {
4781 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4782 		cik_mec_fini(rdev);
4783 		return r;
4784 	}
4785 
4786 	/* clear memory.  Not sure if this is required or not */
4787 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4788 
4789 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4790 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791 
4792 	return 0;
4793 }
4794 
4795 struct hqd_registers
4796 {
4797 	u32 cp_mqd_base_addr;
4798 	u32 cp_mqd_base_addr_hi;
4799 	u32 cp_hqd_active;
4800 	u32 cp_hqd_vmid;
4801 	u32 cp_hqd_persistent_state;
4802 	u32 cp_hqd_pipe_priority;
4803 	u32 cp_hqd_queue_priority;
4804 	u32 cp_hqd_quantum;
4805 	u32 cp_hqd_pq_base;
4806 	u32 cp_hqd_pq_base_hi;
4807 	u32 cp_hqd_pq_rptr;
4808 	u32 cp_hqd_pq_rptr_report_addr;
4809 	u32 cp_hqd_pq_rptr_report_addr_hi;
4810 	u32 cp_hqd_pq_wptr_poll_addr;
4811 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4812 	u32 cp_hqd_pq_doorbell_control;
4813 	u32 cp_hqd_pq_wptr;
4814 	u32 cp_hqd_pq_control;
4815 	u32 cp_hqd_ib_base_addr;
4816 	u32 cp_hqd_ib_base_addr_hi;
4817 	u32 cp_hqd_ib_rptr;
4818 	u32 cp_hqd_ib_control;
4819 	u32 cp_hqd_iq_timer;
4820 	u32 cp_hqd_iq_rptr;
4821 	u32 cp_hqd_dequeue_request;
4822 	u32 cp_hqd_dma_offload;
4823 	u32 cp_hqd_sema_cmd;
4824 	u32 cp_hqd_msg_type;
4825 	u32 cp_hqd_atomic0_preop_lo;
4826 	u32 cp_hqd_atomic0_preop_hi;
4827 	u32 cp_hqd_atomic1_preop_lo;
4828 	u32 cp_hqd_atomic1_preop_hi;
4829 	u32 cp_hqd_hq_scheduler0;
4830 	u32 cp_hqd_hq_scheduler1;
4831 	u32 cp_mqd_control;
4832 };
4833 
4834 struct bonaire_mqd
4835 {
4836 	u32 header;
4837 	u32 dispatch_initiator;
4838 	u32 dimensions[3];
4839 	u32 start_idx[3];
4840 	u32 num_threads[3];
4841 	u32 pipeline_stat_enable;
4842 	u32 perf_counter_enable;
4843 	u32 pgm[2];
4844 	u32 tba[2];
4845 	u32 tma[2];
4846 	u32 pgm_rsrc[2];
4847 	u32 vmid;
4848 	u32 resource_limits;
4849 	u32 static_thread_mgmt01[2];
4850 	u32 tmp_ring_size;
4851 	u32 static_thread_mgmt23[2];
4852 	u32 restart[3];
4853 	u32 thread_trace_enable;
4854 	u32 reserved1;
4855 	u32 user_data[16];
4856 	u32 vgtcs_invoke_count[2];
4857 	struct hqd_registers queue_state;
4858 	u32 dequeue_cntr;
4859 	u32 interrupt_queue[64];
4860 };
4861 
4862 /**
4863  * cik_cp_compute_resume - setup the compute queue registers
4864  *
4865  * @rdev: radeon_device pointer
4866  *
4867  * Program the compute queues and test them to make sure they
4868  * are working.
4869  * Returns 0 for success, error for failure.
4870  */
4871 static int cik_cp_compute_resume(struct radeon_device *rdev)
4872 {
4873 	int r, i, j, idx;
4874 	u32 tmp;
4875 	bool use_doorbell = true;
4876 	u64 hqd_gpu_addr;
4877 	u64 mqd_gpu_addr;
4878 	u64 eop_gpu_addr;
4879 	u64 wb_gpu_addr;
4880 	u32 *buf;
4881 	struct bonaire_mqd *mqd;
4882 
4883 	r = cik_cp_compute_start(rdev);
4884 	if (r)
4885 		return r;
4886 
4887 	/* fix up chicken bits */
4888 	tmp = RREG32(CP_CPF_DEBUG);
4889 	tmp |= (1 << 23);
4890 	WREG32(CP_CPF_DEBUG, tmp);
4891 
4892 	/* init the pipes */
4893 	mutex_lock(&rdev->srbm_mutex);
4894 
4895 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4896 
4897 	cik_srbm_select(rdev, 0, 0, 0, 0);
4898 
4899 	/* write the EOP addr */
4900 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4901 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4902 
4903 	/* set the VMID assigned */
4904 	WREG32(CP_HPD_EOP_VMID, 0);
4905 
4906 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4907 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4908 	tmp &= ~EOP_SIZE_MASK;
4909 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4910 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4911 
4912 	mutex_unlock(&rdev->srbm_mutex);
4913 
4914 	/* init the queues.  Just two for now. */
4915 	for (i = 0; i < 2; i++) {
4916 		if (i == 0)
4917 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4918 		else
4919 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4920 
4921 		if (rdev->ring[idx].mqd_obj == NULL) {
4922 			r = radeon_bo_create(rdev,
4923 					     sizeof(struct bonaire_mqd),
4924 					     PAGE_SIZE, true,
4925 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4926 					     NULL, &rdev->ring[idx].mqd_obj);
4927 			if (r) {
4928 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4929 				return r;
4930 			}
4931 		}
4932 
4933 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4934 		if (unlikely(r != 0)) {
4935 			cik_cp_compute_fini(rdev);
4936 			return r;
4937 		}
4938 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4939 				  &mqd_gpu_addr);
4940 		if (r) {
4941 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4942 			cik_cp_compute_fini(rdev);
4943 			return r;
4944 		}
4945 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4946 		if (r) {
4947 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4948 			cik_cp_compute_fini(rdev);
4949 			return r;
4950 		}
4951 
4952 		/* init the mqd struct */
4953 		memset(buf, 0, sizeof(struct bonaire_mqd));
4954 
4955 		mqd = (struct bonaire_mqd *)buf;
4956 		mqd->header = 0xC0310800;
4957 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4958 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4959 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4960 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4961 
4962 		mutex_lock(&rdev->srbm_mutex);
4963 		cik_srbm_select(rdev, rdev->ring[idx].me,
4964 				rdev->ring[idx].pipe,
4965 				rdev->ring[idx].queue, 0);
4966 
4967 		/* disable wptr polling */
4968 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4969 		tmp &= ~WPTR_POLL_EN;
4970 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4971 
4972 		/* enable doorbell? */
4973 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4974 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4975 		if (use_doorbell)
4976 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4977 		else
4978 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4979 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4980 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4981 
4982 		/* disable the queue if it's active */
4983 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4984 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4985 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4986 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4987 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4988 			for (j = 0; j < rdev->usec_timeout; j++) {
4989 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4990 					break;
4991 				udelay(1);
4992 			}
4993 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4994 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4995 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4996 		}
4997 
4998 		/* set the pointer to the MQD */
4999 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5000 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5001 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5002 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5003 		/* set MQD vmid to 0 */
5004 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5005 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5006 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5007 
5008 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5009 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5010 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5011 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5012 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5013 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5014 
5015 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5016 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5017 		mqd->queue_state.cp_hqd_pq_control &=
5018 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5019 
5020 		mqd->queue_state.cp_hqd_pq_control |=
5021 			order_base_2(rdev->ring[idx].ring_size / 8);
5022 		mqd->queue_state.cp_hqd_pq_control |=
5023 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5024 #ifdef __BIG_ENDIAN
5025 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5026 #endif
5027 		mqd->queue_state.cp_hqd_pq_control &=
5028 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5029 		mqd->queue_state.cp_hqd_pq_control |=
5030 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5031 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5032 
5033 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5034 		if (i == 0)
5035 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5036 		else
5037 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5038 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5039 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5040 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5041 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5042 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5043 
5044 		/* set the wb address wether it's enabled or not */
5045 		if (i == 0)
5046 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5047 		else
5048 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5049 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5050 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5051 			upper_32_bits(wb_gpu_addr) & 0xffff;
5052 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5053 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5054 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5055 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5056 
5057 		/* enable the doorbell if requested */
5058 		if (use_doorbell) {
5059 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5060 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5061 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5062 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5063 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5064 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5065 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5066 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5067 
5068 		} else {
5069 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5070 		}
5071 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5072 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5073 
5074 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5075 		rdev->ring[idx].wptr = 0;
5076 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5077 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5078 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5079 
5080 		/* set the vmid for the queue */
5081 		mqd->queue_state.cp_hqd_vmid = 0;
5082 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5083 
5084 		/* activate the queue */
5085 		mqd->queue_state.cp_hqd_active = 1;
5086 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5087 
5088 		cik_srbm_select(rdev, 0, 0, 0, 0);
5089 		mutex_unlock(&rdev->srbm_mutex);
5090 
5091 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5092 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5093 
5094 		rdev->ring[idx].ready = true;
5095 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5096 		if (r)
5097 			rdev->ring[idx].ready = false;
5098 	}
5099 
5100 	return 0;
5101 }
5102 
5103 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5104 {
5105 	cik_cp_gfx_enable(rdev, enable);
5106 	cik_cp_compute_enable(rdev, enable);
5107 }
5108 
5109 static int cik_cp_load_microcode(struct radeon_device *rdev)
5110 {
5111 	int r;
5112 
5113 	r = cik_cp_gfx_load_microcode(rdev);
5114 	if (r)
5115 		return r;
5116 	r = cik_cp_compute_load_microcode(rdev);
5117 	if (r)
5118 		return r;
5119 
5120 	return 0;
5121 }
5122 
5123 static void cik_cp_fini(struct radeon_device *rdev)
5124 {
5125 	cik_cp_gfx_fini(rdev);
5126 	cik_cp_compute_fini(rdev);
5127 }
5128 
5129 static int cik_cp_resume(struct radeon_device *rdev)
5130 {
5131 	int r;
5132 
5133 	cik_enable_gui_idle_interrupt(rdev, false);
5134 
5135 	r = cik_cp_load_microcode(rdev);
5136 	if (r)
5137 		return r;
5138 
5139 	r = cik_cp_gfx_resume(rdev);
5140 	if (r)
5141 		return r;
5142 	r = cik_cp_compute_resume(rdev);
5143 	if (r)
5144 		return r;
5145 
5146 	cik_enable_gui_idle_interrupt(rdev, true);
5147 
5148 	return 0;
5149 }
5150 
5151 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5152 {
5153 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5154 		RREG32(GRBM_STATUS));
5155 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5156 		RREG32(GRBM_STATUS2));
5157 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5158 		RREG32(GRBM_STATUS_SE0));
5159 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5160 		RREG32(GRBM_STATUS_SE1));
5161 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5162 		RREG32(GRBM_STATUS_SE2));
5163 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5164 		RREG32(GRBM_STATUS_SE3));
5165 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5166 		RREG32(SRBM_STATUS));
5167 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5168 		RREG32(SRBM_STATUS2));
5169 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5170 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5171 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5172 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5173 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5174 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5175 		 RREG32(CP_STALLED_STAT1));
5176 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5177 		 RREG32(CP_STALLED_STAT2));
5178 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5179 		 RREG32(CP_STALLED_STAT3));
5180 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5181 		 RREG32(CP_CPF_BUSY_STAT));
5182 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5183 		 RREG32(CP_CPF_STALLED_STAT1));
5184 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5185 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5186 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5187 		 RREG32(CP_CPC_STALLED_STAT1));
5188 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5189 }
5190 
5191 /**
5192  * cik_gpu_check_soft_reset - check which blocks are busy
5193  *
5194  * @rdev: radeon_device pointer
5195  *
5196  * Check which blocks are busy and return the relevant reset
5197  * mask to be used by cik_gpu_soft_reset().
5198  * Returns a mask of the blocks to be reset.
5199  */
5200 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5201 {
5202 	u32 reset_mask = 0;
5203 	u32 tmp;
5204 
5205 	/* GRBM_STATUS */
5206 	tmp = RREG32(GRBM_STATUS);
5207 	if (tmp & (PA_BUSY | SC_BUSY |
5208 		   BCI_BUSY | SX_BUSY |
5209 		   TA_BUSY | VGT_BUSY |
5210 		   DB_BUSY | CB_BUSY |
5211 		   GDS_BUSY | SPI_BUSY |
5212 		   IA_BUSY | IA_BUSY_NO_DMA))
5213 		reset_mask |= RADEON_RESET_GFX;
5214 
5215 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5216 		reset_mask |= RADEON_RESET_CP;
5217 
5218 	/* GRBM_STATUS2 */
5219 	tmp = RREG32(GRBM_STATUS2);
5220 	if (tmp & RLC_BUSY)
5221 		reset_mask |= RADEON_RESET_RLC;
5222 
5223 	/* SDMA0_STATUS_REG */
5224 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5225 	if (!(tmp & SDMA_IDLE))
5226 		reset_mask |= RADEON_RESET_DMA;
5227 
5228 	/* SDMA1_STATUS_REG */
5229 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5230 	if (!(tmp & SDMA_IDLE))
5231 		reset_mask |= RADEON_RESET_DMA1;
5232 
5233 	/* SRBM_STATUS2 */
5234 	tmp = RREG32(SRBM_STATUS2);
5235 	if (tmp & SDMA_BUSY)
5236 		reset_mask |= RADEON_RESET_DMA;
5237 
5238 	if (tmp & SDMA1_BUSY)
5239 		reset_mask |= RADEON_RESET_DMA1;
5240 
5241 	/* SRBM_STATUS */
5242 	tmp = RREG32(SRBM_STATUS);
5243 
5244 	if (tmp & IH_BUSY)
5245 		reset_mask |= RADEON_RESET_IH;
5246 
5247 	if (tmp & SEM_BUSY)
5248 		reset_mask |= RADEON_RESET_SEM;
5249 
5250 	if (tmp & GRBM_RQ_PENDING)
5251 		reset_mask |= RADEON_RESET_GRBM;
5252 
5253 	if (tmp & VMC_BUSY)
5254 		reset_mask |= RADEON_RESET_VMC;
5255 
5256 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5257 		   MCC_BUSY | MCD_BUSY))
5258 		reset_mask |= RADEON_RESET_MC;
5259 
5260 	if (evergreen_is_display_hung(rdev))
5261 		reset_mask |= RADEON_RESET_DISPLAY;
5262 
5263 	/* Skip MC reset as it's mostly likely not hung, just busy */
5264 	if (reset_mask & RADEON_RESET_MC) {
5265 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5266 		reset_mask &= ~RADEON_RESET_MC;
5267 	}
5268 
5269 	return reset_mask;
5270 }
5271 
5272 /**
5273  * cik_gpu_soft_reset - soft reset GPU
5274  *
5275  * @rdev: radeon_device pointer
5276  * @reset_mask: mask of which blocks to reset
5277  *
5278  * Soft reset the blocks specified in @reset_mask.
5279  */
5280 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5281 {
5282 	struct evergreen_mc_save save;
5283 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5284 	u32 tmp;
5285 
5286 	if (reset_mask == 0)
5287 		return;
5288 
5289 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5290 
5291 	cik_print_gpu_status_regs(rdev);
5292 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5293 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5294 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5295 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5296 
5297 	/* disable CG/PG */
5298 	cik_fini_pg(rdev);
5299 	cik_fini_cg(rdev);
5300 
5301 	/* stop the rlc */
5302 	cik_rlc_stop(rdev);
5303 
5304 	/* Disable GFX parsing/prefetching */
5305 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5306 
5307 	/* Disable MEC parsing/prefetching */
5308 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5309 
5310 	if (reset_mask & RADEON_RESET_DMA) {
5311 		/* sdma0 */
5312 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5313 		tmp |= SDMA_HALT;
5314 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5315 	}
5316 	if (reset_mask & RADEON_RESET_DMA1) {
5317 		/* sdma1 */
5318 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5319 		tmp |= SDMA_HALT;
5320 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5321 	}
5322 
5323 	evergreen_mc_stop(rdev, &save);
5324 	if (evergreen_mc_wait_for_idle(rdev)) {
5325 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326 	}
5327 
5328 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5329 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5330 
5331 	if (reset_mask & RADEON_RESET_CP) {
5332 		grbm_soft_reset |= SOFT_RESET_CP;
5333 
5334 		srbm_soft_reset |= SOFT_RESET_GRBM;
5335 	}
5336 
5337 	if (reset_mask & RADEON_RESET_DMA)
5338 		srbm_soft_reset |= SOFT_RESET_SDMA;
5339 
5340 	if (reset_mask & RADEON_RESET_DMA1)
5341 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5342 
5343 	if (reset_mask & RADEON_RESET_DISPLAY)
5344 		srbm_soft_reset |= SOFT_RESET_DC;
5345 
5346 	if (reset_mask & RADEON_RESET_RLC)
5347 		grbm_soft_reset |= SOFT_RESET_RLC;
5348 
5349 	if (reset_mask & RADEON_RESET_SEM)
5350 		srbm_soft_reset |= SOFT_RESET_SEM;
5351 
5352 	if (reset_mask & RADEON_RESET_IH)
5353 		srbm_soft_reset |= SOFT_RESET_IH;
5354 
5355 	if (reset_mask & RADEON_RESET_GRBM)
5356 		srbm_soft_reset |= SOFT_RESET_GRBM;
5357 
5358 	if (reset_mask & RADEON_RESET_VMC)
5359 		srbm_soft_reset |= SOFT_RESET_VMC;
5360 
5361 	if (!(rdev->flags & RADEON_IS_IGP)) {
5362 		if (reset_mask & RADEON_RESET_MC)
5363 			srbm_soft_reset |= SOFT_RESET_MC;
5364 	}
5365 
5366 	if (grbm_soft_reset) {
5367 		tmp = RREG32(GRBM_SOFT_RESET);
5368 		tmp |= grbm_soft_reset;
5369 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5370 		WREG32(GRBM_SOFT_RESET, tmp);
5371 		tmp = RREG32(GRBM_SOFT_RESET);
5372 
5373 		udelay(50);
5374 
5375 		tmp &= ~grbm_soft_reset;
5376 		WREG32(GRBM_SOFT_RESET, tmp);
5377 		tmp = RREG32(GRBM_SOFT_RESET);
5378 	}
5379 
5380 	if (srbm_soft_reset) {
5381 		tmp = RREG32(SRBM_SOFT_RESET);
5382 		tmp |= srbm_soft_reset;
5383 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5384 		WREG32(SRBM_SOFT_RESET, tmp);
5385 		tmp = RREG32(SRBM_SOFT_RESET);
5386 
5387 		udelay(50);
5388 
5389 		tmp &= ~srbm_soft_reset;
5390 		WREG32(SRBM_SOFT_RESET, tmp);
5391 		tmp = RREG32(SRBM_SOFT_RESET);
5392 	}
5393 
5394 	/* Wait a little for things to settle down */
5395 	udelay(50);
5396 
5397 	evergreen_mc_resume(rdev, &save);
5398 	udelay(50);
5399 
5400 	cik_print_gpu_status_regs(rdev);
5401 }
5402 
5403 struct kv_reset_save_regs {
5404 	u32 gmcon_reng_execute;
5405 	u32 gmcon_misc;
5406 	u32 gmcon_misc3;
5407 };
5408 
5409 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5410 				   struct kv_reset_save_regs *save)
5411 {
5412 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5413 	save->gmcon_misc = RREG32(GMCON_MISC);
5414 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5415 
5416 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5417 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5418 						STCTRL_STUTTER_EN));
5419 }
5420 
5421 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5422 				      struct kv_reset_save_regs *save)
5423 {
5424 	int i;
5425 
5426 	WREG32(GMCON_PGFSM_WRITE, 0);
5427 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5428 
5429 	for (i = 0; i < 5; i++)
5430 		WREG32(GMCON_PGFSM_WRITE, 0);
5431 
5432 	WREG32(GMCON_PGFSM_WRITE, 0);
5433 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5434 
5435 	for (i = 0; i < 5; i++)
5436 		WREG32(GMCON_PGFSM_WRITE, 0);
5437 
5438 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5439 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5440 
5441 	for (i = 0; i < 5; i++)
5442 		WREG32(GMCON_PGFSM_WRITE, 0);
5443 
5444 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5445 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5446 
5447 	for (i = 0; i < 5; i++)
5448 		WREG32(GMCON_PGFSM_WRITE, 0);
5449 
5450 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5451 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5452 
5453 	for (i = 0; i < 5; i++)
5454 		WREG32(GMCON_PGFSM_WRITE, 0);
5455 
5456 	WREG32(GMCON_PGFSM_WRITE, 0);
5457 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5458 
5459 	for (i = 0; i < 5; i++)
5460 		WREG32(GMCON_PGFSM_WRITE, 0);
5461 
5462 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5463 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5464 
5465 	for (i = 0; i < 5; i++)
5466 		WREG32(GMCON_PGFSM_WRITE, 0);
5467 
5468 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5469 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5470 
5471 	for (i = 0; i < 5; i++)
5472 		WREG32(GMCON_PGFSM_WRITE, 0);
5473 
5474 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5475 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5476 
5477 	for (i = 0; i < 5; i++)
5478 		WREG32(GMCON_PGFSM_WRITE, 0);
5479 
5480 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5481 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5482 
5483 	for (i = 0; i < 5; i++)
5484 		WREG32(GMCON_PGFSM_WRITE, 0);
5485 
5486 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5487 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5488 
5489 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5490 	WREG32(GMCON_MISC, save->gmcon_misc);
5491 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5492 }
5493 
5494 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5495 {
5496 	struct evergreen_mc_save save;
5497 	struct kv_reset_save_regs kv_save = { 0 };
5498 	u32 tmp, i;
5499 
5500 	dev_info(rdev->dev, "GPU pci config reset\n");
5501 
5502 	/* disable dpm? */
5503 
5504 	/* disable cg/pg */
5505 	cik_fini_pg(rdev);
5506 	cik_fini_cg(rdev);
5507 
5508 	/* Disable GFX parsing/prefetching */
5509 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5510 
5511 	/* Disable MEC parsing/prefetching */
5512 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5513 
5514 	/* sdma0 */
5515 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5516 	tmp |= SDMA_HALT;
5517 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5518 	/* sdma1 */
5519 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5520 	tmp |= SDMA_HALT;
5521 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5522 	/* XXX other engines? */
5523 
5524 	/* halt the rlc, disable cp internal ints */
5525 	cik_rlc_stop(rdev);
5526 
5527 	udelay(50);
5528 
5529 	/* disable mem access */
5530 	evergreen_mc_stop(rdev, &save);
5531 	if (evergreen_mc_wait_for_idle(rdev)) {
5532 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5533 	}
5534 
5535 	if (rdev->flags & RADEON_IS_IGP)
5536 		kv_save_regs_for_reset(rdev, &kv_save);
5537 
5538 	/* disable BM */
5539 	pci_clear_master(rdev->pdev);
5540 	/* reset */
5541 	radeon_pci_config_reset(rdev);
5542 
5543 	udelay(100);
5544 
5545 	/* wait for asic to come out of reset */
5546 	for (i = 0; i < rdev->usec_timeout; i++) {
5547 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5548 			break;
5549 		udelay(1);
5550 	}
5551 
5552 	/* does asic init need to be run first??? */
5553 	if (rdev->flags & RADEON_IS_IGP)
5554 		kv_restore_regs_for_reset(rdev, &kv_save);
5555 }
5556 
5557 /**
5558  * cik_asic_reset - soft reset GPU
5559  *
5560  * @rdev: radeon_device pointer
5561  *
5562  * Look up which blocks are hung and attempt
5563  * to reset them.
5564  * Returns 0 for success.
5565  */
5566 int cik_asic_reset(struct radeon_device *rdev)
5567 {
5568 	u32 reset_mask;
5569 
5570 	reset_mask = cik_gpu_check_soft_reset(rdev);
5571 
5572 	if (reset_mask)
5573 		r600_set_bios_scratch_engine_hung(rdev, true);
5574 
5575 	/* try soft reset */
5576 	cik_gpu_soft_reset(rdev, reset_mask);
5577 
5578 	reset_mask = cik_gpu_check_soft_reset(rdev);
5579 
5580 	/* try pci config reset */
5581 	if (reset_mask && radeon_hard_reset)
5582 		cik_gpu_pci_config_reset(rdev);
5583 
5584 	reset_mask = cik_gpu_check_soft_reset(rdev);
5585 
5586 	if (!reset_mask)
5587 		r600_set_bios_scratch_engine_hung(rdev, false);
5588 
5589 	return 0;
5590 }
5591 
5592 /**
5593  * cik_gfx_is_lockup - check if the 3D engine is locked up
5594  *
5595  * @rdev: radeon_device pointer
5596  * @ring: radeon_ring structure holding ring information
5597  *
5598  * Check if the 3D engine is locked up (CIK).
5599  * Returns true if the engine is locked, false if not.
5600  */
5601 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5602 {
5603 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5604 
5605 	if (!(reset_mask & (RADEON_RESET_GFX |
5606 			    RADEON_RESET_COMPUTE |
5607 			    RADEON_RESET_CP))) {
5608 		radeon_ring_lockup_update(rdev, ring);
5609 		return false;
5610 	}
5611 	return radeon_ring_test_lockup(rdev, ring);
5612 }
5613 
5614 /* MC */
5615 /**
5616  * cik_mc_program - program the GPU memory controller
5617  *
5618  * @rdev: radeon_device pointer
5619  *
5620  * Set the location of vram, gart, and AGP in the GPU's
5621  * physical address space (CIK).
5622  */
5623 static void cik_mc_program(struct radeon_device *rdev)
5624 {
5625 	struct evergreen_mc_save save;
5626 	u32 tmp;
5627 	int i, j;
5628 
5629 	/* Initialize HDP */
5630 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5631 		WREG32((0x2c14 + j), 0x00000000);
5632 		WREG32((0x2c18 + j), 0x00000000);
5633 		WREG32((0x2c1c + j), 0x00000000);
5634 		WREG32((0x2c20 + j), 0x00000000);
5635 		WREG32((0x2c24 + j), 0x00000000);
5636 	}
5637 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5638 
5639 	evergreen_mc_stop(rdev, &save);
5640 	if (radeon_mc_wait_for_idle(rdev)) {
5641 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5642 	}
5643 	/* Lockout access through VGA aperture*/
5644 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5645 	/* Update configuration */
5646 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5647 	       rdev->mc.vram_start >> 12);
5648 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5649 	       rdev->mc.vram_end >> 12);
5650 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5651 	       rdev->vram_scratch.gpu_addr >> 12);
5652 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5653 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5654 	WREG32(MC_VM_FB_LOCATION, tmp);
5655 	/* XXX double check these! */
5656 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5657 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5658 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5659 	WREG32(MC_VM_AGP_BASE, 0);
5660 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5661 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5662 	if (radeon_mc_wait_for_idle(rdev)) {
5663 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5664 	}
5665 	evergreen_mc_resume(rdev, &save);
5666 	/* we need to own VRAM, so turn off the VGA renderer here
5667 	 * to stop it overwriting our objects */
5668 	rv515_vga_render_disable(rdev);
5669 }
5670 
5671 /**
5672  * cik_mc_init - initialize the memory controller driver params
5673  *
5674  * @rdev: radeon_device pointer
5675  *
5676  * Look up the amount of vram, vram width, and decide how to place
5677  * vram and gart within the GPU's physical address space (CIK).
5678  * Returns 0 for success.
5679  */
5680 static int cik_mc_init(struct radeon_device *rdev)
5681 {
5682 	u32 tmp;
5683 	int chansize, numchan;
5684 
5685 	/* Get VRAM informations */
5686 	rdev->mc.vram_is_ddr = true;
5687 	tmp = RREG32(MC_ARB_RAMCFG);
5688 	if (tmp & CHANSIZE_MASK) {
5689 		chansize = 64;
5690 	} else {
5691 		chansize = 32;
5692 	}
5693 	tmp = RREG32(MC_SHARED_CHMAP);
5694 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5695 	case 0:
5696 	default:
5697 		numchan = 1;
5698 		break;
5699 	case 1:
5700 		numchan = 2;
5701 		break;
5702 	case 2:
5703 		numchan = 4;
5704 		break;
5705 	case 3:
5706 		numchan = 8;
5707 		break;
5708 	case 4:
5709 		numchan = 3;
5710 		break;
5711 	case 5:
5712 		numchan = 6;
5713 		break;
5714 	case 6:
5715 		numchan = 10;
5716 		break;
5717 	case 7:
5718 		numchan = 12;
5719 		break;
5720 	case 8:
5721 		numchan = 16;
5722 		break;
5723 	}
5724 	rdev->mc.vram_width = numchan * chansize;
5725 	/* Could aper size report 0 ? */
5726 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5727 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5728 	/* size in MB on si */
5729 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5730 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5731 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5732 	si_vram_gtt_location(rdev, &rdev->mc);
5733 	radeon_update_bandwidth_info(rdev);
5734 
5735 	return 0;
5736 }
5737 
5738 /*
5739  * GART
5740  * VMID 0 is the physical GPU addresses as used by the kernel.
5741  * VMIDs 1-15 are used for userspace clients and are handled
5742  * by the radeon vm/hsa code.
5743  */
5744 /**
5745  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5746  *
5747  * @rdev: radeon_device pointer
5748  *
5749  * Flush the TLB for the VMID 0 page table (CIK).
5750  */
5751 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5752 {
5753 	/* flush hdp cache */
5754 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5755 
5756 	/* bits 0-15 are the VM contexts0-15 */
5757 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5758 }
5759 
5760 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5761 {
5762 	int i;
5763 	uint32_t sh_mem_bases, sh_mem_config;
5764 
5765 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5766 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5767 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5768 
5769 	mutex_lock(&rdev->srbm_mutex);
5770 	for (i = 8; i < 16; i++) {
5771 		cik_srbm_select(rdev, 0, 0, 0, i);
5772 		/* CP and shaders */
5773 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5774 		WREG32(SH_MEM_APE1_BASE, 1);
5775 		WREG32(SH_MEM_APE1_LIMIT, 0);
5776 		WREG32(SH_MEM_BASES, sh_mem_bases);
5777 	}
5778 	cik_srbm_select(rdev, 0, 0, 0, 0);
5779 	mutex_unlock(&rdev->srbm_mutex);
5780 }
5781 
5782 /**
5783  * cik_pcie_gart_enable - gart enable
5784  *
5785  * @rdev: radeon_device pointer
5786  *
5787  * This sets up the TLBs, programs the page tables for VMID0,
5788  * sets up the hw for VMIDs 1-15 which are allocated on
5789  * demand, and sets up the global locations for the LDS, GDS,
5790  * and GPUVM for FSA64 clients (CIK).
5791  * Returns 0 for success, errors for failure.
5792  */
5793 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5794 {
5795 	int r, i;
5796 
5797 	if (rdev->gart.robj == NULL) {
5798 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5799 		return -EINVAL;
5800 	}
5801 	r = radeon_gart_table_vram_pin(rdev);
5802 	if (r)
5803 		return r;
5804 	/* Setup TLB control */
5805 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5806 	       (0xA << 7) |
5807 	       ENABLE_L1_TLB |
5808 	       ENABLE_L1_FRAGMENT_PROCESSING |
5809 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5810 	       ENABLE_ADVANCED_DRIVER_MODEL |
5811 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5812 	/* Setup L2 cache */
5813 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5814 	       ENABLE_L2_FRAGMENT_PROCESSING |
5815 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5816 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5817 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5818 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5819 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5820 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5821 	       BANK_SELECT(4) |
5822 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5823 	/* setup context0 */
5824 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5825 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5826 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5827 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5828 			(u32)(rdev->dummy_page.addr >> 12));
5829 	WREG32(VM_CONTEXT0_CNTL2, 0);
5830 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5831 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5832 
5833 	WREG32(0x15D4, 0);
5834 	WREG32(0x15D8, 0);
5835 	WREG32(0x15DC, 0);
5836 
5837 	/* restore context1-15 */
5838 	/* set vm size, must be a multiple of 4 */
5839 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5840 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5841 	for (i = 1; i < 16; i++) {
5842 		if (i < 8)
5843 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5844 			       rdev->vm_manager.saved_table_addr[i]);
5845 		else
5846 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5847 			       rdev->vm_manager.saved_table_addr[i]);
5848 	}
5849 
5850 	/* enable context1-15 */
5851 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5852 	       (u32)(rdev->dummy_page.addr >> 12));
5853 	WREG32(VM_CONTEXT1_CNTL2, 4);
5854 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5855 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5856 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5857 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5858 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5859 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5860 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5861 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5862 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5863 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5864 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5865 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5866 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5867 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5868 
5869 	if (rdev->family == CHIP_KAVERI) {
5870 		u32 tmp = RREG32(CHUB_CONTROL);
5871 		tmp &= ~BYPASS_VM;
5872 		WREG32(CHUB_CONTROL, tmp);
5873 	}
5874 
5875 	/* XXX SH_MEM regs */
5876 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5877 	mutex_lock(&rdev->srbm_mutex);
5878 	for (i = 0; i < 16; i++) {
5879 		cik_srbm_select(rdev, 0, 0, 0, i);
5880 		/* CP and shaders */
5881 		WREG32(SH_MEM_CONFIG, 0);
5882 		WREG32(SH_MEM_APE1_BASE, 1);
5883 		WREG32(SH_MEM_APE1_LIMIT, 0);
5884 		WREG32(SH_MEM_BASES, 0);
5885 		/* SDMA GFX */
5886 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5887 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5888 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5889 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5890 		/* XXX SDMA RLC - todo */
5891 	}
5892 	cik_srbm_select(rdev, 0, 0, 0, 0);
5893 	mutex_unlock(&rdev->srbm_mutex);
5894 
5895 	cik_pcie_init_compute_vmid(rdev);
5896 
5897 	cik_pcie_gart_tlb_flush(rdev);
5898 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5899 		 (unsigned)(rdev->mc.gtt_size >> 20),
5900 		 (unsigned long long)rdev->gart.table_addr);
5901 	rdev->gart.ready = true;
5902 	return 0;
5903 }
5904 
5905 /**
5906  * cik_pcie_gart_disable - gart disable
5907  *
5908  * @rdev: radeon_device pointer
5909  *
5910  * This disables all VM page table (CIK).
5911  */
5912 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5913 {
5914 	unsigned i;
5915 
5916 	for (i = 1; i < 16; ++i) {
5917 		uint32_t reg;
5918 		if (i < 8)
5919 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5920 		else
5921 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5922 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5923 	}
5924 
5925 	/* Disable all tables */
5926 	WREG32(VM_CONTEXT0_CNTL, 0);
5927 	WREG32(VM_CONTEXT1_CNTL, 0);
5928 	/* Setup TLB control */
5929 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5930 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5931 	/* Setup L2 cache */
5932 	WREG32(VM_L2_CNTL,
5933 	       ENABLE_L2_FRAGMENT_PROCESSING |
5934 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5935 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5936 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5937 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5938 	WREG32(VM_L2_CNTL2, 0);
5939 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5940 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5941 	radeon_gart_table_vram_unpin(rdev);
5942 }
5943 
5944 /**
5945  * cik_pcie_gart_fini - vm fini callback
5946  *
5947  * @rdev: radeon_device pointer
5948  *
5949  * Tears down the driver GART/VM setup (CIK).
5950  */
5951 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5952 {
5953 	cik_pcie_gart_disable(rdev);
5954 	radeon_gart_table_vram_free(rdev);
5955 	radeon_gart_fini(rdev);
5956 }
5957 
5958 /* vm parser */
5959 /**
5960  * cik_ib_parse - vm ib_parse callback
5961  *
5962  * @rdev: radeon_device pointer
5963  * @ib: indirect buffer pointer
5964  *
5965  * CIK uses hw IB checking so this is a nop (CIK).
5966  */
5967 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5968 {
5969 	return 0;
5970 }
5971 
5972 /*
5973  * vm
5974  * VMID 0 is the physical GPU addresses as used by the kernel.
5975  * VMIDs 1-15 are used for userspace clients and are handled
5976  * by the radeon vm/hsa code.
5977  */
5978 /**
5979  * cik_vm_init - cik vm init callback
5980  *
5981  * @rdev: radeon_device pointer
5982  *
5983  * Inits cik specific vm parameters (number of VMs, base of vram for
5984  * VMIDs 1-15) (CIK).
5985  * Returns 0 for success.
5986  */
5987 int cik_vm_init(struct radeon_device *rdev)
5988 {
5989 	/*
5990 	 * number of VMs
5991 	 * VMID 0 is reserved for System
5992 	 * radeon graphics/compute will use VMIDs 1-7
5993 	 * amdkfd will use VMIDs 8-15
5994 	 */
5995 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5996 	/* base offset of vram pages */
5997 	if (rdev->flags & RADEON_IS_IGP) {
5998 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5999 		tmp <<= 22;
6000 		rdev->vm_manager.vram_base_offset = tmp;
6001 	} else
6002 		rdev->vm_manager.vram_base_offset = 0;
6003 
6004 	return 0;
6005 }
6006 
6007 /**
6008  * cik_vm_fini - cik vm fini callback
6009  *
6010  * @rdev: radeon_device pointer
6011  *
6012  * Tear down any asic specific VM setup (CIK).
6013  */
6014 void cik_vm_fini(struct radeon_device *rdev)
6015 {
6016 }
6017 
6018 /**
6019  * cik_vm_decode_fault - print human readable fault info
6020  *
6021  * @rdev: radeon_device pointer
6022  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6023  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6024  *
6025  * Print human readable fault information (CIK).
6026  */
6027 static void cik_vm_decode_fault(struct radeon_device *rdev,
6028 				u32 status, u32 addr, u32 mc_client)
6029 {
6030 	u32 mc_id;
6031 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6032 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6033 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6034 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6035 
6036 	if (rdev->family == CHIP_HAWAII)
6037 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6038 	else
6039 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6040 
6041 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6042 	       protections, vmid, addr,
6043 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6044 	       block, mc_client, mc_id);
6045 }
6046 
6047 /**
6048  * cik_vm_flush - cik vm flush using the CP
6049  *
6050  * @rdev: radeon_device pointer
6051  *
6052  * Update the page table base and flush the VM TLB
6053  * using the CP (CIK).
6054  */
6055 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6056 		  unsigned vm_id, uint64_t pd_addr)
6057 {
6058 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6059 
6060 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6061 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6062 				 WRITE_DATA_DST_SEL(0)));
6063 	if (vm_id < 8) {
6064 		radeon_ring_write(ring,
6065 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6066 	} else {
6067 		radeon_ring_write(ring,
6068 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6069 	}
6070 	radeon_ring_write(ring, 0);
6071 	radeon_ring_write(ring, pd_addr >> 12);
6072 
6073 	/* update SH_MEM_* regs */
6074 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6075 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6076 				 WRITE_DATA_DST_SEL(0)));
6077 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6078 	radeon_ring_write(ring, 0);
6079 	radeon_ring_write(ring, VMID(vm_id));
6080 
6081 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6082 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6083 				 WRITE_DATA_DST_SEL(0)));
6084 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6085 	radeon_ring_write(ring, 0);
6086 
6087 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6088 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6089 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6090 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6091 
6092 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6093 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6094 				 WRITE_DATA_DST_SEL(0)));
6095 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6096 	radeon_ring_write(ring, 0);
6097 	radeon_ring_write(ring, VMID(0));
6098 
6099 	/* HDP flush */
6100 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6101 
6102 	/* bits 0-15 are the VM contexts0-15 */
6103 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6104 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6105 				 WRITE_DATA_DST_SEL(0)));
6106 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6107 	radeon_ring_write(ring, 0);
6108 	radeon_ring_write(ring, 1 << vm_id);
6109 
6110 	/* wait for the invalidate to complete */
6111 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6112 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6113 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6114 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6115 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6116 	radeon_ring_write(ring, 0);
6117 	radeon_ring_write(ring, 0); /* ref */
6118 	radeon_ring_write(ring, 0); /* mask */
6119 	radeon_ring_write(ring, 0x20); /* poll interval */
6120 
6121 	/* compute doesn't have PFP */
6122 	if (usepfp) {
6123 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6124 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6125 		radeon_ring_write(ring, 0x0);
6126 	}
6127 }
6128 
6129 /*
6130  * RLC
6131  * The RLC is a multi-purpose microengine that handles a
6132  * variety of functions, the most important of which is
6133  * the interrupt controller.
6134  */
6135 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6136 					  bool enable)
6137 {
6138 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6139 
6140 	if (enable)
6141 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6142 	else
6143 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6144 	WREG32(CP_INT_CNTL_RING0, tmp);
6145 }
6146 
6147 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6148 {
6149 	u32 tmp;
6150 
6151 	tmp = RREG32(RLC_LB_CNTL);
6152 	if (enable)
6153 		tmp |= LOAD_BALANCE_ENABLE;
6154 	else
6155 		tmp &= ~LOAD_BALANCE_ENABLE;
6156 	WREG32(RLC_LB_CNTL, tmp);
6157 }
6158 
6159 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6160 {
6161 	u32 i, j, k;
6162 	u32 mask;
6163 
6164 	mutex_lock(&rdev->grbm_idx_mutex);
6165 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6166 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6167 			cik_select_se_sh(rdev, i, j);
6168 			for (k = 0; k < rdev->usec_timeout; k++) {
6169 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6170 					break;
6171 				udelay(1);
6172 			}
6173 		}
6174 	}
6175 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6176 	mutex_unlock(&rdev->grbm_idx_mutex);
6177 
6178 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6179 	for (k = 0; k < rdev->usec_timeout; k++) {
6180 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6181 			break;
6182 		udelay(1);
6183 	}
6184 }
6185 
6186 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6187 {
6188 	u32 tmp;
6189 
6190 	tmp = RREG32(RLC_CNTL);
6191 	if (tmp != rlc)
6192 		WREG32(RLC_CNTL, rlc);
6193 }
6194 
6195 static u32 cik_halt_rlc(struct radeon_device *rdev)
6196 {
6197 	u32 data, orig;
6198 
6199 	orig = data = RREG32(RLC_CNTL);
6200 
6201 	if (data & RLC_ENABLE) {
6202 		u32 i;
6203 
6204 		data &= ~RLC_ENABLE;
6205 		WREG32(RLC_CNTL, data);
6206 
6207 		for (i = 0; i < rdev->usec_timeout; i++) {
6208 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6209 				break;
6210 			udelay(1);
6211 		}
6212 
6213 		cik_wait_for_rlc_serdes(rdev);
6214 	}
6215 
6216 	return orig;
6217 }
6218 
6219 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6220 {
6221 	u32 tmp, i, mask;
6222 
6223 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6224 	WREG32(RLC_GPR_REG2, tmp);
6225 
6226 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6227 	for (i = 0; i < rdev->usec_timeout; i++) {
6228 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6229 			break;
6230 		udelay(1);
6231 	}
6232 
6233 	for (i = 0; i < rdev->usec_timeout; i++) {
6234 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6235 			break;
6236 		udelay(1);
6237 	}
6238 }
6239 
6240 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6241 {
6242 	u32 tmp;
6243 
6244 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6245 	WREG32(RLC_GPR_REG2, tmp);
6246 }
6247 
6248 /**
6249  * cik_rlc_stop - stop the RLC ME
6250  *
6251  * @rdev: radeon_device pointer
6252  *
6253  * Halt the RLC ME (MicroEngine) (CIK).
6254  */
6255 static void cik_rlc_stop(struct radeon_device *rdev)
6256 {
6257 	WREG32(RLC_CNTL, 0);
6258 
6259 	cik_enable_gui_idle_interrupt(rdev, false);
6260 
6261 	cik_wait_for_rlc_serdes(rdev);
6262 }
6263 
6264 /**
6265  * cik_rlc_start - start the RLC ME
6266  *
6267  * @rdev: radeon_device pointer
6268  *
6269  * Unhalt the RLC ME (MicroEngine) (CIK).
6270  */
6271 static void cik_rlc_start(struct radeon_device *rdev)
6272 {
6273 	WREG32(RLC_CNTL, RLC_ENABLE);
6274 
6275 	cik_enable_gui_idle_interrupt(rdev, true);
6276 
6277 	udelay(50);
6278 }
6279 
6280 /**
6281  * cik_rlc_resume - setup the RLC hw
6282  *
6283  * @rdev: radeon_device pointer
6284  *
6285  * Initialize the RLC registers, load the ucode,
6286  * and start the RLC (CIK).
6287  * Returns 0 for success, -EINVAL if the ucode is not available.
6288  */
6289 static int cik_rlc_resume(struct radeon_device *rdev)
6290 {
6291 	u32 i, size, tmp;
6292 
6293 	if (!rdev->rlc_fw)
6294 		return -EINVAL;
6295 
6296 	cik_rlc_stop(rdev);
6297 
6298 	/* disable CG */
6299 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6300 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6301 
6302 	si_rlc_reset(rdev);
6303 
6304 	cik_init_pg(rdev);
6305 
6306 	cik_init_cg(rdev);
6307 
6308 	WREG32(RLC_LB_CNTR_INIT, 0);
6309 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6310 
6311 	mutex_lock(&rdev->grbm_idx_mutex);
6312 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6313 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6314 	WREG32(RLC_LB_PARAMS, 0x00600408);
6315 	WREG32(RLC_LB_CNTL, 0x80000004);
6316 	mutex_unlock(&rdev->grbm_idx_mutex);
6317 
6318 	WREG32(RLC_MC_CNTL, 0);
6319 	WREG32(RLC_UCODE_CNTL, 0);
6320 
6321 	if (rdev->new_fw) {
6322 		const struct rlc_firmware_header_v1_0 *hdr =
6323 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6324 		const __le32 *fw_data = (const __le32 *)
6325 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6326 
6327 		radeon_ucode_print_rlc_hdr(&hdr->header);
6328 
6329 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6330 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6331 		for (i = 0; i < size; i++)
6332 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6333 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6334 	} else {
6335 		const __be32 *fw_data;
6336 
6337 		switch (rdev->family) {
6338 		case CHIP_BONAIRE:
6339 		case CHIP_HAWAII:
6340 		default:
6341 			size = BONAIRE_RLC_UCODE_SIZE;
6342 			break;
6343 		case CHIP_KAVERI:
6344 			size = KV_RLC_UCODE_SIZE;
6345 			break;
6346 		case CHIP_KABINI:
6347 			size = KB_RLC_UCODE_SIZE;
6348 			break;
6349 		case CHIP_MULLINS:
6350 			size = ML_RLC_UCODE_SIZE;
6351 			break;
6352 		}
6353 
6354 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6355 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6356 		for (i = 0; i < size; i++)
6357 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6358 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6359 	}
6360 
6361 	/* XXX - find out what chips support lbpw */
6362 	cik_enable_lbpw(rdev, false);
6363 
6364 	if (rdev->family == CHIP_BONAIRE)
6365 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6366 
6367 	cik_rlc_start(rdev);
6368 
6369 	return 0;
6370 }
6371 
6372 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6373 {
6374 	u32 data, orig, tmp, tmp2;
6375 
6376 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6377 
6378 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6379 		cik_enable_gui_idle_interrupt(rdev, true);
6380 
6381 		tmp = cik_halt_rlc(rdev);
6382 
6383 		mutex_lock(&rdev->grbm_idx_mutex);
6384 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6385 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6386 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6387 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6388 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6389 		mutex_unlock(&rdev->grbm_idx_mutex);
6390 
6391 		cik_update_rlc(rdev, tmp);
6392 
6393 		data |= CGCG_EN | CGLS_EN;
6394 	} else {
6395 		cik_enable_gui_idle_interrupt(rdev, false);
6396 
6397 		RREG32(CB_CGTT_SCLK_CTRL);
6398 		RREG32(CB_CGTT_SCLK_CTRL);
6399 		RREG32(CB_CGTT_SCLK_CTRL);
6400 		RREG32(CB_CGTT_SCLK_CTRL);
6401 
6402 		data &= ~(CGCG_EN | CGLS_EN);
6403 	}
6404 
6405 	if (orig != data)
6406 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6407 
6408 }
6409 
6410 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6411 {
6412 	u32 data, orig, tmp = 0;
6413 
6414 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6415 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6416 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6417 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6418 				data |= CP_MEM_LS_EN;
6419 				if (orig != data)
6420 					WREG32(CP_MEM_SLP_CNTL, data);
6421 			}
6422 		}
6423 
6424 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6425 		data |= 0x00000001;
6426 		data &= 0xfffffffd;
6427 		if (orig != data)
6428 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6429 
6430 		tmp = cik_halt_rlc(rdev);
6431 
6432 		mutex_lock(&rdev->grbm_idx_mutex);
6433 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6434 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6435 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6436 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6437 		WREG32(RLC_SERDES_WR_CTRL, data);
6438 		mutex_unlock(&rdev->grbm_idx_mutex);
6439 
6440 		cik_update_rlc(rdev, tmp);
6441 
6442 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6443 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6444 			data &= ~SM_MODE_MASK;
6445 			data |= SM_MODE(0x2);
6446 			data |= SM_MODE_ENABLE;
6447 			data &= ~CGTS_OVERRIDE;
6448 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6449 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6450 				data &= ~CGTS_LS_OVERRIDE;
6451 			data &= ~ON_MONITOR_ADD_MASK;
6452 			data |= ON_MONITOR_ADD_EN;
6453 			data |= ON_MONITOR_ADD(0x96);
6454 			if (orig != data)
6455 				WREG32(CGTS_SM_CTRL_REG, data);
6456 		}
6457 	} else {
6458 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459 		data |= 0x00000003;
6460 		if (orig != data)
6461 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6462 
6463 		data = RREG32(RLC_MEM_SLP_CNTL);
6464 		if (data & RLC_MEM_LS_EN) {
6465 			data &= ~RLC_MEM_LS_EN;
6466 			WREG32(RLC_MEM_SLP_CNTL, data);
6467 		}
6468 
6469 		data = RREG32(CP_MEM_SLP_CNTL);
6470 		if (data & CP_MEM_LS_EN) {
6471 			data &= ~CP_MEM_LS_EN;
6472 			WREG32(CP_MEM_SLP_CNTL, data);
6473 		}
6474 
6475 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6476 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6477 		if (orig != data)
6478 			WREG32(CGTS_SM_CTRL_REG, data);
6479 
6480 		tmp = cik_halt_rlc(rdev);
6481 
6482 		mutex_lock(&rdev->grbm_idx_mutex);
6483 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6484 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6485 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6486 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6487 		WREG32(RLC_SERDES_WR_CTRL, data);
6488 		mutex_unlock(&rdev->grbm_idx_mutex);
6489 
6490 		cik_update_rlc(rdev, tmp);
6491 	}
6492 }
6493 
6494 static const u32 mc_cg_registers[] =
6495 {
6496 	MC_HUB_MISC_HUB_CG,
6497 	MC_HUB_MISC_SIP_CG,
6498 	MC_HUB_MISC_VM_CG,
6499 	MC_XPB_CLK_GAT,
6500 	ATC_MISC_CG,
6501 	MC_CITF_MISC_WR_CG,
6502 	MC_CITF_MISC_RD_CG,
6503 	MC_CITF_MISC_VM_CG,
6504 	VM_L2_CG,
6505 };
6506 
6507 static void cik_enable_mc_ls(struct radeon_device *rdev,
6508 			     bool enable)
6509 {
6510 	int i;
6511 	u32 orig, data;
6512 
6513 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6514 		orig = data = RREG32(mc_cg_registers[i]);
6515 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6516 			data |= MC_LS_ENABLE;
6517 		else
6518 			data &= ~MC_LS_ENABLE;
6519 		if (data != orig)
6520 			WREG32(mc_cg_registers[i], data);
6521 	}
6522 }
6523 
6524 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6525 			       bool enable)
6526 {
6527 	int i;
6528 	u32 orig, data;
6529 
6530 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6531 		orig = data = RREG32(mc_cg_registers[i]);
6532 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6533 			data |= MC_CG_ENABLE;
6534 		else
6535 			data &= ~MC_CG_ENABLE;
6536 		if (data != orig)
6537 			WREG32(mc_cg_registers[i], data);
6538 	}
6539 }
6540 
6541 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6542 				 bool enable)
6543 {
6544 	u32 orig, data;
6545 
6546 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6547 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6548 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6549 	} else {
6550 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6551 		data |= 0xff000000;
6552 		if (data != orig)
6553 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6554 
6555 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6556 		data |= 0xff000000;
6557 		if (data != orig)
6558 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6559 	}
6560 }
6561 
6562 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6563 				 bool enable)
6564 {
6565 	u32 orig, data;
6566 
6567 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6568 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6569 		data |= 0x100;
6570 		if (orig != data)
6571 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6572 
6573 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6574 		data |= 0x100;
6575 		if (orig != data)
6576 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6577 	} else {
6578 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6579 		data &= ~0x100;
6580 		if (orig != data)
6581 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6582 
6583 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6584 		data &= ~0x100;
6585 		if (orig != data)
6586 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6587 	}
6588 }
6589 
6590 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6591 				bool enable)
6592 {
6593 	u32 orig, data;
6594 
6595 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6596 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6597 		data = 0xfff;
6598 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6599 
6600 		orig = data = RREG32(UVD_CGC_CTRL);
6601 		data |= DCM;
6602 		if (orig != data)
6603 			WREG32(UVD_CGC_CTRL, data);
6604 	} else {
6605 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6606 		data &= ~0xfff;
6607 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6608 
6609 		orig = data = RREG32(UVD_CGC_CTRL);
6610 		data &= ~DCM;
6611 		if (orig != data)
6612 			WREG32(UVD_CGC_CTRL, data);
6613 	}
6614 }
6615 
6616 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6617 			       bool enable)
6618 {
6619 	u32 orig, data;
6620 
6621 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6622 
6623 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6624 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6625 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6626 	else
6627 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6628 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6629 
6630 	if (orig != data)
6631 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6632 }
6633 
6634 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6635 				bool enable)
6636 {
6637 	u32 orig, data;
6638 
6639 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6640 
6641 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6642 		data &= ~CLOCK_GATING_DIS;
6643 	else
6644 		data |= CLOCK_GATING_DIS;
6645 
6646 	if (orig != data)
6647 		WREG32(HDP_HOST_PATH_CNTL, data);
6648 }
6649 
6650 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6651 			      bool enable)
6652 {
6653 	u32 orig, data;
6654 
6655 	orig = data = RREG32(HDP_MEM_POWER_LS);
6656 
6657 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6658 		data |= HDP_LS_ENABLE;
6659 	else
6660 		data &= ~HDP_LS_ENABLE;
6661 
6662 	if (orig != data)
6663 		WREG32(HDP_MEM_POWER_LS, data);
6664 }
6665 
6666 void cik_update_cg(struct radeon_device *rdev,
6667 		   u32 block, bool enable)
6668 {
6669 
6670 	if (block & RADEON_CG_BLOCK_GFX) {
6671 		cik_enable_gui_idle_interrupt(rdev, false);
6672 		/* order matters! */
6673 		if (enable) {
6674 			cik_enable_mgcg(rdev, true);
6675 			cik_enable_cgcg(rdev, true);
6676 		} else {
6677 			cik_enable_cgcg(rdev, false);
6678 			cik_enable_mgcg(rdev, false);
6679 		}
6680 		cik_enable_gui_idle_interrupt(rdev, true);
6681 	}
6682 
6683 	if (block & RADEON_CG_BLOCK_MC) {
6684 		if (!(rdev->flags & RADEON_IS_IGP)) {
6685 			cik_enable_mc_mgcg(rdev, enable);
6686 			cik_enable_mc_ls(rdev, enable);
6687 		}
6688 	}
6689 
6690 	if (block & RADEON_CG_BLOCK_SDMA) {
6691 		cik_enable_sdma_mgcg(rdev, enable);
6692 		cik_enable_sdma_mgls(rdev, enable);
6693 	}
6694 
6695 	if (block & RADEON_CG_BLOCK_BIF) {
6696 		cik_enable_bif_mgls(rdev, enable);
6697 	}
6698 
6699 	if (block & RADEON_CG_BLOCK_UVD) {
6700 		if (rdev->has_uvd)
6701 			cik_enable_uvd_mgcg(rdev, enable);
6702 	}
6703 
6704 	if (block & RADEON_CG_BLOCK_HDP) {
6705 		cik_enable_hdp_mgcg(rdev, enable);
6706 		cik_enable_hdp_ls(rdev, enable);
6707 	}
6708 
6709 	if (block & RADEON_CG_BLOCK_VCE) {
6710 		vce_v2_0_enable_mgcg(rdev, enable);
6711 	}
6712 }
6713 
6714 static void cik_init_cg(struct radeon_device *rdev)
6715 {
6716 
6717 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6718 
6719 	if (rdev->has_uvd)
6720 		si_init_uvd_internal_cg(rdev);
6721 
6722 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6723 			     RADEON_CG_BLOCK_SDMA |
6724 			     RADEON_CG_BLOCK_BIF |
6725 			     RADEON_CG_BLOCK_UVD |
6726 			     RADEON_CG_BLOCK_HDP), true);
6727 }
6728 
6729 static void cik_fini_cg(struct radeon_device *rdev)
6730 {
6731 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6732 			     RADEON_CG_BLOCK_SDMA |
6733 			     RADEON_CG_BLOCK_BIF |
6734 			     RADEON_CG_BLOCK_UVD |
6735 			     RADEON_CG_BLOCK_HDP), false);
6736 
6737 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6738 }
6739 
6740 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6741 					  bool enable)
6742 {
6743 	u32 data, orig;
6744 
6745 	orig = data = RREG32(RLC_PG_CNTL);
6746 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6747 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6748 	else
6749 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6750 	if (orig != data)
6751 		WREG32(RLC_PG_CNTL, data);
6752 }
6753 
6754 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6755 					  bool enable)
6756 {
6757 	u32 data, orig;
6758 
6759 	orig = data = RREG32(RLC_PG_CNTL);
6760 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6761 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6762 	else
6763 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6764 	if (orig != data)
6765 		WREG32(RLC_PG_CNTL, data);
6766 }
6767 
6768 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6769 {
6770 	u32 data, orig;
6771 
6772 	orig = data = RREG32(RLC_PG_CNTL);
6773 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6774 		data &= ~DISABLE_CP_PG;
6775 	else
6776 		data |= DISABLE_CP_PG;
6777 	if (orig != data)
6778 		WREG32(RLC_PG_CNTL, data);
6779 }
6780 
6781 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6782 {
6783 	u32 data, orig;
6784 
6785 	orig = data = RREG32(RLC_PG_CNTL);
6786 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6787 		data &= ~DISABLE_GDS_PG;
6788 	else
6789 		data |= DISABLE_GDS_PG;
6790 	if (orig != data)
6791 		WREG32(RLC_PG_CNTL, data);
6792 }
6793 
6794 #define CP_ME_TABLE_SIZE    96
6795 #define CP_ME_TABLE_OFFSET  2048
6796 #define CP_MEC_TABLE_OFFSET 4096
6797 
6798 void cik_init_cp_pg_table(struct radeon_device *rdev)
6799 {
6800 	volatile u32 *dst_ptr;
6801 	int me, i, max_me = 4;
6802 	u32 bo_offset = 0;
6803 	u32 table_offset, table_size;
6804 
6805 	if (rdev->family == CHIP_KAVERI)
6806 		max_me = 5;
6807 
6808 	if (rdev->rlc.cp_table_ptr == NULL)
6809 		return;
6810 
6811 	/* write the cp table buffer */
6812 	dst_ptr = rdev->rlc.cp_table_ptr;
6813 	for (me = 0; me < max_me; me++) {
6814 		if (rdev->new_fw) {
6815 			const __le32 *fw_data;
6816 			const struct gfx_firmware_header_v1_0 *hdr;
6817 
6818 			if (me == 0) {
6819 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6820 				fw_data = (const __le32 *)
6821 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6822 				table_offset = le32_to_cpu(hdr->jt_offset);
6823 				table_size = le32_to_cpu(hdr->jt_size);
6824 			} else if (me == 1) {
6825 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6826 				fw_data = (const __le32 *)
6827 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6828 				table_offset = le32_to_cpu(hdr->jt_offset);
6829 				table_size = le32_to_cpu(hdr->jt_size);
6830 			} else if (me == 2) {
6831 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6832 				fw_data = (const __le32 *)
6833 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6834 				table_offset = le32_to_cpu(hdr->jt_offset);
6835 				table_size = le32_to_cpu(hdr->jt_size);
6836 			} else if (me == 3) {
6837 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6838 				fw_data = (const __le32 *)
6839 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6840 				table_offset = le32_to_cpu(hdr->jt_offset);
6841 				table_size = le32_to_cpu(hdr->jt_size);
6842 			} else {
6843 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6844 				fw_data = (const __le32 *)
6845 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6846 				table_offset = le32_to_cpu(hdr->jt_offset);
6847 				table_size = le32_to_cpu(hdr->jt_size);
6848 			}
6849 
6850 			for (i = 0; i < table_size; i ++) {
6851 				dst_ptr[bo_offset + i] =
6852 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6853 			}
6854 			bo_offset += table_size;
6855 		} else {
6856 			const __be32 *fw_data;
6857 			table_size = CP_ME_TABLE_SIZE;
6858 
6859 			if (me == 0) {
6860 				fw_data = (const __be32 *)rdev->ce_fw->data;
6861 				table_offset = CP_ME_TABLE_OFFSET;
6862 			} else if (me == 1) {
6863 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6864 				table_offset = CP_ME_TABLE_OFFSET;
6865 			} else if (me == 2) {
6866 				fw_data = (const __be32 *)rdev->me_fw->data;
6867 				table_offset = CP_ME_TABLE_OFFSET;
6868 			} else {
6869 				fw_data = (const __be32 *)rdev->mec_fw->data;
6870 				table_offset = CP_MEC_TABLE_OFFSET;
6871 			}
6872 
6873 			for (i = 0; i < table_size; i ++) {
6874 				dst_ptr[bo_offset + i] =
6875 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6876 			}
6877 			bo_offset += table_size;
6878 		}
6879 	}
6880 }
6881 
6882 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6883 				bool enable)
6884 {
6885 	u32 data, orig;
6886 
6887 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6888 		orig = data = RREG32(RLC_PG_CNTL);
6889 		data |= GFX_PG_ENABLE;
6890 		if (orig != data)
6891 			WREG32(RLC_PG_CNTL, data);
6892 
6893 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6894 		data |= AUTO_PG_EN;
6895 		if (orig != data)
6896 			WREG32(RLC_AUTO_PG_CTRL, data);
6897 	} else {
6898 		orig = data = RREG32(RLC_PG_CNTL);
6899 		data &= ~GFX_PG_ENABLE;
6900 		if (orig != data)
6901 			WREG32(RLC_PG_CNTL, data);
6902 
6903 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6904 		data &= ~AUTO_PG_EN;
6905 		if (orig != data)
6906 			WREG32(RLC_AUTO_PG_CTRL, data);
6907 
6908 		data = RREG32(DB_RENDER_CONTROL);
6909 	}
6910 }
6911 
6912 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6913 {
6914 	u32 mask = 0, tmp, tmp1;
6915 	int i;
6916 
6917 	mutex_lock(&rdev->grbm_idx_mutex);
6918 	cik_select_se_sh(rdev, se, sh);
6919 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6920 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6921 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6922 	mutex_unlock(&rdev->grbm_idx_mutex);
6923 
6924 	tmp &= 0xffff0000;
6925 
6926 	tmp |= tmp1;
6927 	tmp >>= 16;
6928 
6929 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6930 		mask <<= 1;
6931 		mask |= 1;
6932 	}
6933 
6934 	return (~tmp) & mask;
6935 }
6936 
6937 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6938 {
6939 	u32 i, j, k, active_cu_number = 0;
6940 	u32 mask, counter, cu_bitmap;
6941 	u32 tmp = 0;
6942 
6943 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6944 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6945 			mask = 1;
6946 			cu_bitmap = 0;
6947 			counter = 0;
6948 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6949 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6950 					if (counter < 2)
6951 						cu_bitmap |= mask;
6952 					counter ++;
6953 				}
6954 				mask <<= 1;
6955 			}
6956 
6957 			active_cu_number += counter;
6958 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6959 		}
6960 	}
6961 
6962 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6963 
6964 	tmp = RREG32(RLC_MAX_PG_CU);
6965 	tmp &= ~MAX_PU_CU_MASK;
6966 	tmp |= MAX_PU_CU(active_cu_number);
6967 	WREG32(RLC_MAX_PG_CU, tmp);
6968 }
6969 
6970 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6971 				       bool enable)
6972 {
6973 	u32 data, orig;
6974 
6975 	orig = data = RREG32(RLC_PG_CNTL);
6976 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6977 		data |= STATIC_PER_CU_PG_ENABLE;
6978 	else
6979 		data &= ~STATIC_PER_CU_PG_ENABLE;
6980 	if (orig != data)
6981 		WREG32(RLC_PG_CNTL, data);
6982 }
6983 
6984 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6985 					bool enable)
6986 {
6987 	u32 data, orig;
6988 
6989 	orig = data = RREG32(RLC_PG_CNTL);
6990 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6991 		data |= DYN_PER_CU_PG_ENABLE;
6992 	else
6993 		data &= ~DYN_PER_CU_PG_ENABLE;
6994 	if (orig != data)
6995 		WREG32(RLC_PG_CNTL, data);
6996 }
6997 
6998 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6999 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7000 
7001 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7002 {
7003 	u32 data, orig;
7004 	u32 i;
7005 
7006 	if (rdev->rlc.cs_data) {
7007 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7008 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7009 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7010 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7011 	} else {
7012 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7013 		for (i = 0; i < 3; i++)
7014 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7015 	}
7016 	if (rdev->rlc.reg_list) {
7017 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7018 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7019 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7020 	}
7021 
7022 	orig = data = RREG32(RLC_PG_CNTL);
7023 	data |= GFX_PG_SRC;
7024 	if (orig != data)
7025 		WREG32(RLC_PG_CNTL, data);
7026 
7027 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7028 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7029 
7030 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7031 	data &= ~IDLE_POLL_COUNT_MASK;
7032 	data |= IDLE_POLL_COUNT(0x60);
7033 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7034 
7035 	data = 0x10101010;
7036 	WREG32(RLC_PG_DELAY, data);
7037 
7038 	data = RREG32(RLC_PG_DELAY_2);
7039 	data &= ~0xff;
7040 	data |= 0x3;
7041 	WREG32(RLC_PG_DELAY_2, data);
7042 
7043 	data = RREG32(RLC_AUTO_PG_CTRL);
7044 	data &= ~GRBM_REG_SGIT_MASK;
7045 	data |= GRBM_REG_SGIT(0x700);
7046 	WREG32(RLC_AUTO_PG_CTRL, data);
7047 
7048 }
7049 
7050 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7051 {
7052 	cik_enable_gfx_cgpg(rdev, enable);
7053 	cik_enable_gfx_static_mgpg(rdev, enable);
7054 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7055 }
7056 
7057 u32 cik_get_csb_size(struct radeon_device *rdev)
7058 {
7059 	u32 count = 0;
7060 	const struct cs_section_def *sect = NULL;
7061 	const struct cs_extent_def *ext = NULL;
7062 
7063 	if (rdev->rlc.cs_data == NULL)
7064 		return 0;
7065 
7066 	/* begin clear state */
7067 	count += 2;
7068 	/* context control state */
7069 	count += 3;
7070 
7071 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7072 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7073 			if (sect->id == SECT_CONTEXT)
7074 				count += 2 + ext->reg_count;
7075 			else
7076 				return 0;
7077 		}
7078 	}
7079 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7080 	count += 4;
7081 	/* end clear state */
7082 	count += 2;
7083 	/* clear state */
7084 	count += 2;
7085 
7086 	return count;
7087 }
7088 
7089 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7090 {
7091 	u32 count = 0, i;
7092 	const struct cs_section_def *sect = NULL;
7093 	const struct cs_extent_def *ext = NULL;
7094 
7095 	if (rdev->rlc.cs_data == NULL)
7096 		return;
7097 	if (buffer == NULL)
7098 		return;
7099 
7100 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7101 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7102 
7103 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7104 	buffer[count++] = cpu_to_le32(0x80000000);
7105 	buffer[count++] = cpu_to_le32(0x80000000);
7106 
7107 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7108 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7109 			if (sect->id == SECT_CONTEXT) {
7110 				buffer[count++] =
7111 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7112 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7113 				for (i = 0; i < ext->reg_count; i++)
7114 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7115 			} else {
7116 				return;
7117 			}
7118 		}
7119 	}
7120 
7121 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7122 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7123 	switch (rdev->family) {
7124 	case CHIP_BONAIRE:
7125 		buffer[count++] = cpu_to_le32(0x16000012);
7126 		buffer[count++] = cpu_to_le32(0x00000000);
7127 		break;
7128 	case CHIP_KAVERI:
7129 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7130 		buffer[count++] = cpu_to_le32(0x00000000);
7131 		break;
7132 	case CHIP_KABINI:
7133 	case CHIP_MULLINS:
7134 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7135 		buffer[count++] = cpu_to_le32(0x00000000);
7136 		break;
7137 	case CHIP_HAWAII:
7138 		buffer[count++] = cpu_to_le32(0x3a00161a);
7139 		buffer[count++] = cpu_to_le32(0x0000002e);
7140 		break;
7141 	default:
7142 		buffer[count++] = cpu_to_le32(0x00000000);
7143 		buffer[count++] = cpu_to_le32(0x00000000);
7144 		break;
7145 	}
7146 
7147 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7148 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7149 
7150 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7151 	buffer[count++] = cpu_to_le32(0);
7152 }
7153 
7154 static void cik_init_pg(struct radeon_device *rdev)
7155 {
7156 	if (rdev->pg_flags) {
7157 		cik_enable_sck_slowdown_on_pu(rdev, true);
7158 		cik_enable_sck_slowdown_on_pd(rdev, true);
7159 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7160 			cik_init_gfx_cgpg(rdev);
7161 			cik_enable_cp_pg(rdev, true);
7162 			cik_enable_gds_pg(rdev, true);
7163 		}
7164 		cik_init_ao_cu_mask(rdev);
7165 		cik_update_gfx_pg(rdev, true);
7166 	}
7167 }
7168 
7169 static void cik_fini_pg(struct radeon_device *rdev)
7170 {
7171 	if (rdev->pg_flags) {
7172 		cik_update_gfx_pg(rdev, false);
7173 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7174 			cik_enable_cp_pg(rdev, false);
7175 			cik_enable_gds_pg(rdev, false);
7176 		}
7177 	}
7178 }
7179 
7180 /*
7181  * Interrupts
7182  * Starting with r6xx, interrupts are handled via a ring buffer.
7183  * Ring buffers are areas of GPU accessible memory that the GPU
7184  * writes interrupt vectors into and the host reads vectors out of.
7185  * There is a rptr (read pointer) that determines where the
7186  * host is currently reading, and a wptr (write pointer)
7187  * which determines where the GPU has written.  When the
7188  * pointers are equal, the ring is idle.  When the GPU
7189  * writes vectors to the ring buffer, it increments the
7190  * wptr.  When there is an interrupt, the host then starts
7191  * fetching commands and processing them until the pointers are
7192  * equal again at which point it updates the rptr.
7193  */
7194 
7195 /**
7196  * cik_enable_interrupts - Enable the interrupt ring buffer
7197  *
7198  * @rdev: radeon_device pointer
7199  *
7200  * Enable the interrupt ring buffer (CIK).
7201  */
7202 static void cik_enable_interrupts(struct radeon_device *rdev)
7203 {
7204 	u32 ih_cntl = RREG32(IH_CNTL);
7205 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7206 
7207 	ih_cntl |= ENABLE_INTR;
7208 	ih_rb_cntl |= IH_RB_ENABLE;
7209 	WREG32(IH_CNTL, ih_cntl);
7210 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7211 	rdev->ih.enabled = true;
7212 }
7213 
7214 /**
7215  * cik_disable_interrupts - Disable the interrupt ring buffer
7216  *
7217  * @rdev: radeon_device pointer
7218  *
7219  * Disable the interrupt ring buffer (CIK).
7220  */
7221 static void cik_disable_interrupts(struct radeon_device *rdev)
7222 {
7223 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7224 	u32 ih_cntl = RREG32(IH_CNTL);
7225 
7226 	ih_rb_cntl &= ~IH_RB_ENABLE;
7227 	ih_cntl &= ~ENABLE_INTR;
7228 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7229 	WREG32(IH_CNTL, ih_cntl);
7230 	/* set rptr, wptr to 0 */
7231 	WREG32(IH_RB_RPTR, 0);
7232 	WREG32(IH_RB_WPTR, 0);
7233 	rdev->ih.enabled = false;
7234 	rdev->ih.rptr = 0;
7235 }
7236 
7237 /**
7238  * cik_disable_interrupt_state - Disable all interrupt sources
7239  *
7240  * @rdev: radeon_device pointer
7241  *
7242  * Clear all interrupt enable bits used by the driver (CIK).
7243  */
7244 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7245 {
7246 	u32 tmp;
7247 
7248 	/* gfx ring */
7249 	tmp = RREG32(CP_INT_CNTL_RING0) &
7250 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7251 	WREG32(CP_INT_CNTL_RING0, tmp);
7252 	/* sdma */
7253 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7254 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7255 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7256 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7257 	/* compute queues */
7258 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7259 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7260 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7261 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7262 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7263 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7264 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7265 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7266 	/* grbm */
7267 	WREG32(GRBM_INT_CNTL, 0);
7268 	/* SRBM */
7269 	WREG32(SRBM_INT_CNTL, 0);
7270 	/* vline/vblank, etc. */
7271 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7272 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7273 	if (rdev->num_crtc >= 4) {
7274 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7275 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7276 	}
7277 	if (rdev->num_crtc >= 6) {
7278 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7279 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7280 	}
7281 	/* pflip */
7282 	if (rdev->num_crtc >= 2) {
7283 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7284 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7285 	}
7286 	if (rdev->num_crtc >= 4) {
7287 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7288 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7289 	}
7290 	if (rdev->num_crtc >= 6) {
7291 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7292 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7293 	}
7294 
7295 	/* dac hotplug */
7296 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7297 
7298 	/* digital hotplug */
7299 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7300 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7301 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7302 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7303 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7304 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7305 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7306 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7307 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7308 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7309 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7310 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7311 
7312 }
7313 
7314 /**
7315  * cik_irq_init - init and enable the interrupt ring
7316  *
7317  * @rdev: radeon_device pointer
7318  *
7319  * Allocate a ring buffer for the interrupt controller,
7320  * enable the RLC, disable interrupts, enable the IH
7321  * ring buffer and enable it (CIK).
7322  * Called at device load and reume.
7323  * Returns 0 for success, errors for failure.
7324  */
7325 static int cik_irq_init(struct radeon_device *rdev)
7326 {
7327 	int ret = 0;
7328 	int rb_bufsz;
7329 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7330 
7331 	/* allocate ring */
7332 	ret = r600_ih_ring_alloc(rdev);
7333 	if (ret)
7334 		return ret;
7335 
7336 	/* disable irqs */
7337 	cik_disable_interrupts(rdev);
7338 
7339 	/* init rlc */
7340 	ret = cik_rlc_resume(rdev);
7341 	if (ret) {
7342 		r600_ih_ring_fini(rdev);
7343 		return ret;
7344 	}
7345 
7346 	/* setup interrupt control */
7347 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7348 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7349 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7350 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7351 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7352 	 */
7353 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7354 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7355 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7356 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7357 
7358 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7359 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7360 
7361 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7362 		      IH_WPTR_OVERFLOW_CLEAR |
7363 		      (rb_bufsz << 1));
7364 
7365 	if (rdev->wb.enabled)
7366 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7367 
7368 	/* set the writeback address whether it's enabled or not */
7369 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7370 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7371 
7372 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7373 
7374 	/* set rptr, wptr to 0 */
7375 	WREG32(IH_RB_RPTR, 0);
7376 	WREG32(IH_RB_WPTR, 0);
7377 
7378 	/* Default settings for IH_CNTL (disabled at first) */
7379 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7380 	/* RPTR_REARM only works if msi's are enabled */
7381 	if (rdev->msi_enabled)
7382 		ih_cntl |= RPTR_REARM;
7383 	WREG32(IH_CNTL, ih_cntl);
7384 
7385 	/* force the active interrupt state to all disabled */
7386 	cik_disable_interrupt_state(rdev);
7387 
7388 	pci_set_master(rdev->pdev);
7389 
7390 	/* enable irqs */
7391 	cik_enable_interrupts(rdev);
7392 
7393 	return ret;
7394 }
7395 
7396 /**
7397  * cik_irq_set - enable/disable interrupt sources
7398  *
7399  * @rdev: radeon_device pointer
7400  *
7401  * Enable interrupt sources on the GPU (vblanks, hpd,
7402  * etc.) (CIK).
7403  * Returns 0 for success, errors for failure.
7404  */
7405 int cik_irq_set(struct radeon_device *rdev)
7406 {
7407 	u32 cp_int_cntl;
7408 	u32 cp_m1p0;
7409 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7410 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7411 	u32 grbm_int_cntl = 0;
7412 	u32 dma_cntl, dma_cntl1;
7413 
7414 	if (!rdev->irq.installed) {
7415 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7416 		return -EINVAL;
7417 	}
7418 	/* don't enable anything if the ih is disabled */
7419 	if (!rdev->ih.enabled) {
7420 		cik_disable_interrupts(rdev);
7421 		/* force the active interrupt state to all disabled */
7422 		cik_disable_interrupt_state(rdev);
7423 		return 0;
7424 	}
7425 
7426 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7427 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7428 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7429 
7430 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7431 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7432 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7433 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7434 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7435 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7436 
7437 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7438 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7439 
7440 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7441 
7442 	/* enable CP interrupts on all rings */
7443 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7444 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7445 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7446 	}
7447 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7448 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7449 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7450 		if (ring->me == 1) {
7451 			switch (ring->pipe) {
7452 			case 0:
7453 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7454 				break;
7455 			default:
7456 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7457 				break;
7458 			}
7459 		} else {
7460 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7461 		}
7462 	}
7463 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7464 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7465 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7466 		if (ring->me == 1) {
7467 			switch (ring->pipe) {
7468 			case 0:
7469 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7470 				break;
7471 			default:
7472 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7473 				break;
7474 			}
7475 		} else {
7476 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7477 		}
7478 	}
7479 
7480 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7481 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7482 		dma_cntl |= TRAP_ENABLE;
7483 	}
7484 
7485 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7486 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7487 		dma_cntl1 |= TRAP_ENABLE;
7488 	}
7489 
7490 	if (rdev->irq.crtc_vblank_int[0] ||
7491 	    atomic_read(&rdev->irq.pflip[0])) {
7492 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7493 		crtc1 |= VBLANK_INTERRUPT_MASK;
7494 	}
7495 	if (rdev->irq.crtc_vblank_int[1] ||
7496 	    atomic_read(&rdev->irq.pflip[1])) {
7497 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7498 		crtc2 |= VBLANK_INTERRUPT_MASK;
7499 	}
7500 	if (rdev->irq.crtc_vblank_int[2] ||
7501 	    atomic_read(&rdev->irq.pflip[2])) {
7502 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7503 		crtc3 |= VBLANK_INTERRUPT_MASK;
7504 	}
7505 	if (rdev->irq.crtc_vblank_int[3] ||
7506 	    atomic_read(&rdev->irq.pflip[3])) {
7507 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7508 		crtc4 |= VBLANK_INTERRUPT_MASK;
7509 	}
7510 	if (rdev->irq.crtc_vblank_int[4] ||
7511 	    atomic_read(&rdev->irq.pflip[4])) {
7512 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7513 		crtc5 |= VBLANK_INTERRUPT_MASK;
7514 	}
7515 	if (rdev->irq.crtc_vblank_int[5] ||
7516 	    atomic_read(&rdev->irq.pflip[5])) {
7517 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7518 		crtc6 |= VBLANK_INTERRUPT_MASK;
7519 	}
7520 	if (rdev->irq.hpd[0]) {
7521 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7522 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7523 	}
7524 	if (rdev->irq.hpd[1]) {
7525 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7526 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7527 	}
7528 	if (rdev->irq.hpd[2]) {
7529 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7530 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7531 	}
7532 	if (rdev->irq.hpd[3]) {
7533 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7534 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7535 	}
7536 	if (rdev->irq.hpd[4]) {
7537 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7538 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7539 	}
7540 	if (rdev->irq.hpd[5]) {
7541 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7542 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7543 	}
7544 
7545 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7546 
7547 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7548 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7549 
7550 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7551 
7552 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7553 
7554 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7555 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7556 	if (rdev->num_crtc >= 4) {
7557 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7558 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7559 	}
7560 	if (rdev->num_crtc >= 6) {
7561 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7562 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7563 	}
7564 
7565 	if (rdev->num_crtc >= 2) {
7566 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7567 		       GRPH_PFLIP_INT_MASK);
7568 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7569 		       GRPH_PFLIP_INT_MASK);
7570 	}
7571 	if (rdev->num_crtc >= 4) {
7572 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7573 		       GRPH_PFLIP_INT_MASK);
7574 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7575 		       GRPH_PFLIP_INT_MASK);
7576 	}
7577 	if (rdev->num_crtc >= 6) {
7578 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7579 		       GRPH_PFLIP_INT_MASK);
7580 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7581 		       GRPH_PFLIP_INT_MASK);
7582 	}
7583 
7584 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7585 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7586 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7587 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7588 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7589 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7590 
7591 	/* posting read */
7592 	RREG32(SRBM_STATUS);
7593 
7594 	return 0;
7595 }
7596 
7597 /**
7598  * cik_irq_ack - ack interrupt sources
7599  *
7600  * @rdev: radeon_device pointer
7601  *
7602  * Ack interrupt sources on the GPU (vblanks, hpd,
7603  * etc.) (CIK).  Certain interrupts sources are sw
7604  * generated and do not require an explicit ack.
7605  */
7606 static inline void cik_irq_ack(struct radeon_device *rdev)
7607 {
7608 	u32 tmp;
7609 
7610 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7611 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7612 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7613 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7614 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7615 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7616 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7617 
7618 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7619 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7620 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7621 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7622 	if (rdev->num_crtc >= 4) {
7623 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7624 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7625 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7626 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7627 	}
7628 	if (rdev->num_crtc >= 6) {
7629 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7630 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7631 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7632 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7633 	}
7634 
7635 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7636 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7637 		       GRPH_PFLIP_INT_CLEAR);
7638 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7639 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7640 		       GRPH_PFLIP_INT_CLEAR);
7641 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7642 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7643 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7644 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7645 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7646 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7647 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7648 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7649 
7650 	if (rdev->num_crtc >= 4) {
7651 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7652 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7653 			       GRPH_PFLIP_INT_CLEAR);
7654 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7655 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7656 			       GRPH_PFLIP_INT_CLEAR);
7657 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7658 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7659 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7660 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7661 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7662 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7663 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7664 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7665 	}
7666 
7667 	if (rdev->num_crtc >= 6) {
7668 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7669 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7670 			       GRPH_PFLIP_INT_CLEAR);
7671 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7672 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7673 			       GRPH_PFLIP_INT_CLEAR);
7674 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7675 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7676 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7677 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7678 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7679 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7680 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7681 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7682 	}
7683 
7684 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7685 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7686 		tmp |= DC_HPDx_INT_ACK;
7687 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7688 	}
7689 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7690 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7691 		tmp |= DC_HPDx_INT_ACK;
7692 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7693 	}
7694 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7695 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7696 		tmp |= DC_HPDx_INT_ACK;
7697 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7698 	}
7699 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7700 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7701 		tmp |= DC_HPDx_INT_ACK;
7702 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7703 	}
7704 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7705 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7706 		tmp |= DC_HPDx_INT_ACK;
7707 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7708 	}
7709 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7710 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7711 		tmp |= DC_HPDx_INT_ACK;
7712 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7713 	}
7714 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7715 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7716 		tmp |= DC_HPDx_RX_INT_ACK;
7717 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7718 	}
7719 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7720 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7721 		tmp |= DC_HPDx_RX_INT_ACK;
7722 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7723 	}
7724 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7725 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7726 		tmp |= DC_HPDx_RX_INT_ACK;
7727 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7728 	}
7729 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7730 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7731 		tmp |= DC_HPDx_RX_INT_ACK;
7732 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7733 	}
7734 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7735 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7736 		tmp |= DC_HPDx_RX_INT_ACK;
7737 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7738 	}
7739 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7740 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7741 		tmp |= DC_HPDx_RX_INT_ACK;
7742 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7743 	}
7744 }
7745 
7746 /**
7747  * cik_irq_disable - disable interrupts
7748  *
7749  * @rdev: radeon_device pointer
7750  *
7751  * Disable interrupts on the hw (CIK).
7752  */
7753 static void cik_irq_disable(struct radeon_device *rdev)
7754 {
7755 	cik_disable_interrupts(rdev);
7756 	/* Wait and acknowledge irq */
7757 	mdelay(1);
7758 	cik_irq_ack(rdev);
7759 	cik_disable_interrupt_state(rdev);
7760 }
7761 
7762 /**
7763  * cik_irq_disable - disable interrupts for suspend
7764  *
7765  * @rdev: radeon_device pointer
7766  *
7767  * Disable interrupts and stop the RLC (CIK).
7768  * Used for suspend.
7769  */
7770 static void cik_irq_suspend(struct radeon_device *rdev)
7771 {
7772 	cik_irq_disable(rdev);
7773 	cik_rlc_stop(rdev);
7774 }
7775 
7776 /**
7777  * cik_irq_fini - tear down interrupt support
7778  *
7779  * @rdev: radeon_device pointer
7780  *
7781  * Disable interrupts on the hw and free the IH ring
7782  * buffer (CIK).
7783  * Used for driver unload.
7784  */
7785 static void cik_irq_fini(struct radeon_device *rdev)
7786 {
7787 	cik_irq_suspend(rdev);
7788 	r600_ih_ring_fini(rdev);
7789 }
7790 
7791 /**
7792  * cik_get_ih_wptr - get the IH ring buffer wptr
7793  *
7794  * @rdev: radeon_device pointer
7795  *
7796  * Get the IH ring buffer wptr from either the register
7797  * or the writeback memory buffer (CIK).  Also check for
7798  * ring buffer overflow and deal with it.
7799  * Used by cik_irq_process().
7800  * Returns the value of the wptr.
7801  */
7802 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7803 {
7804 	u32 wptr, tmp;
7805 
7806 	if (rdev->wb.enabled)
7807 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7808 	else
7809 		wptr = RREG32(IH_RB_WPTR);
7810 
7811 	if (wptr & RB_OVERFLOW) {
7812 		wptr &= ~RB_OVERFLOW;
7813 		/* When a ring buffer overflow happen start parsing interrupt
7814 		 * from the last not overwritten vector (wptr + 16). Hopefully
7815 		 * this should allow us to catchup.
7816 		 */
7817 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7818 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7819 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7820 		tmp = RREG32(IH_RB_CNTL);
7821 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7822 		WREG32(IH_RB_CNTL, tmp);
7823 	}
7824 	return (wptr & rdev->ih.ptr_mask);
7825 }
7826 
7827 /*        CIK IV Ring
7828  * Each IV ring entry is 128 bits:
7829  * [7:0]    - interrupt source id
7830  * [31:8]   - reserved
7831  * [59:32]  - interrupt source data
7832  * [63:60]  - reserved
7833  * [71:64]  - RINGID
7834  *            CP:
7835  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7836  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7837  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7838  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7839  *            PIPE_ID - ME0 0=3D
7840  *                    - ME1&2 compute dispatcher (4 pipes each)
7841  *            SDMA:
7842  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7843  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7844  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7845  * [79:72]  - VMID
7846  * [95:80]  - PASID
7847  * [127:96] - reserved
7848  */
7849 /**
7850  * cik_irq_process - interrupt handler
7851  *
7852  * @rdev: radeon_device pointer
7853  *
7854  * Interrupt hander (CIK).  Walk the IH ring,
7855  * ack interrupts and schedule work to handle
7856  * interrupt events.
7857  * Returns irq process return code.
7858  */
7859 int cik_irq_process(struct radeon_device *rdev)
7860 {
7861 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7862 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7863 	u32 wptr;
7864 	u32 rptr;
7865 	u32 src_id, src_data, ring_id;
7866 	u8 me_id, pipe_id, queue_id;
7867 	u32 ring_index;
7868 	bool queue_hotplug = false;
7869 	bool queue_dp = false;
7870 	bool queue_reset = false;
7871 	u32 addr, status, mc_client;
7872 	bool queue_thermal = false;
7873 
7874 	if (!rdev->ih.enabled || rdev->shutdown)
7875 		return IRQ_NONE;
7876 
7877 	wptr = cik_get_ih_wptr(rdev);
7878 
7879 restart_ih:
7880 	/* is somebody else already processing irqs? */
7881 	if (atomic_xchg(&rdev->ih.lock, 1))
7882 		return IRQ_NONE;
7883 
7884 	rptr = rdev->ih.rptr;
7885 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7886 
7887 	/* Order reading of wptr vs. reading of IH ring data */
7888 	rmb();
7889 
7890 	/* display interrupts */
7891 	cik_irq_ack(rdev);
7892 
7893 	while (rptr != wptr) {
7894 		/* wptr/rptr are in bytes! */
7895 		ring_index = rptr / 4;
7896 
7897 		radeon_kfd_interrupt(rdev,
7898 				(const void *) &rdev->ih.ring[ring_index]);
7899 
7900 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7901 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7902 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7903 
7904 		switch (src_id) {
7905 		case 1: /* D1 vblank/vline */
7906 			switch (src_data) {
7907 			case 0: /* D1 vblank */
7908 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7909 					if (rdev->irq.crtc_vblank_int[0]) {
7910 						drm_handle_vblank(rdev->ddev, 0);
7911 						rdev->pm.vblank_sync = true;
7912 						wake_up(&rdev->irq.vblank_queue);
7913 					}
7914 					if (atomic_read(&rdev->irq.pflip[0]))
7915 						radeon_crtc_handle_vblank(rdev, 0);
7916 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7917 					DRM_DEBUG("IH: D1 vblank\n");
7918 				}
7919 				break;
7920 			case 1: /* D1 vline */
7921 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7922 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7923 					DRM_DEBUG("IH: D1 vline\n");
7924 				}
7925 				break;
7926 			default:
7927 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928 				break;
7929 			}
7930 			break;
7931 		case 2: /* D2 vblank/vline */
7932 			switch (src_data) {
7933 			case 0: /* D2 vblank */
7934 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7935 					if (rdev->irq.crtc_vblank_int[1]) {
7936 						drm_handle_vblank(rdev->ddev, 1);
7937 						rdev->pm.vblank_sync = true;
7938 						wake_up(&rdev->irq.vblank_queue);
7939 					}
7940 					if (atomic_read(&rdev->irq.pflip[1]))
7941 						radeon_crtc_handle_vblank(rdev, 1);
7942 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7943 					DRM_DEBUG("IH: D2 vblank\n");
7944 				}
7945 				break;
7946 			case 1: /* D2 vline */
7947 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7948 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7949 					DRM_DEBUG("IH: D2 vline\n");
7950 				}
7951 				break;
7952 			default:
7953 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7954 				break;
7955 			}
7956 			break;
7957 		case 3: /* D3 vblank/vline */
7958 			switch (src_data) {
7959 			case 0: /* D3 vblank */
7960 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7961 					if (rdev->irq.crtc_vblank_int[2]) {
7962 						drm_handle_vblank(rdev->ddev, 2);
7963 						rdev->pm.vblank_sync = true;
7964 						wake_up(&rdev->irq.vblank_queue);
7965 					}
7966 					if (atomic_read(&rdev->irq.pflip[2]))
7967 						radeon_crtc_handle_vblank(rdev, 2);
7968 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7969 					DRM_DEBUG("IH: D3 vblank\n");
7970 				}
7971 				break;
7972 			case 1: /* D3 vline */
7973 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7974 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7975 					DRM_DEBUG("IH: D3 vline\n");
7976 				}
7977 				break;
7978 			default:
7979 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7980 				break;
7981 			}
7982 			break;
7983 		case 4: /* D4 vblank/vline */
7984 			switch (src_data) {
7985 			case 0: /* D4 vblank */
7986 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7987 					if (rdev->irq.crtc_vblank_int[3]) {
7988 						drm_handle_vblank(rdev->ddev, 3);
7989 						rdev->pm.vblank_sync = true;
7990 						wake_up(&rdev->irq.vblank_queue);
7991 					}
7992 					if (atomic_read(&rdev->irq.pflip[3]))
7993 						radeon_crtc_handle_vblank(rdev, 3);
7994 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7995 					DRM_DEBUG("IH: D4 vblank\n");
7996 				}
7997 				break;
7998 			case 1: /* D4 vline */
7999 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8000 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8001 					DRM_DEBUG("IH: D4 vline\n");
8002 				}
8003 				break;
8004 			default:
8005 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8006 				break;
8007 			}
8008 			break;
8009 		case 5: /* D5 vblank/vline */
8010 			switch (src_data) {
8011 			case 0: /* D5 vblank */
8012 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8013 					if (rdev->irq.crtc_vblank_int[4]) {
8014 						drm_handle_vblank(rdev->ddev, 4);
8015 						rdev->pm.vblank_sync = true;
8016 						wake_up(&rdev->irq.vblank_queue);
8017 					}
8018 					if (atomic_read(&rdev->irq.pflip[4]))
8019 						radeon_crtc_handle_vblank(rdev, 4);
8020 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8021 					DRM_DEBUG("IH: D5 vblank\n");
8022 				}
8023 				break;
8024 			case 1: /* D5 vline */
8025 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8026 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8027 					DRM_DEBUG("IH: D5 vline\n");
8028 				}
8029 				break;
8030 			default:
8031 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8032 				break;
8033 			}
8034 			break;
8035 		case 6: /* D6 vblank/vline */
8036 			switch (src_data) {
8037 			case 0: /* D6 vblank */
8038 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8039 					if (rdev->irq.crtc_vblank_int[5]) {
8040 						drm_handle_vblank(rdev->ddev, 5);
8041 						rdev->pm.vblank_sync = true;
8042 						wake_up(&rdev->irq.vblank_queue);
8043 					}
8044 					if (atomic_read(&rdev->irq.pflip[5]))
8045 						radeon_crtc_handle_vblank(rdev, 5);
8046 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8047 					DRM_DEBUG("IH: D6 vblank\n");
8048 				}
8049 				break;
8050 			case 1: /* D6 vline */
8051 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8052 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8053 					DRM_DEBUG("IH: D6 vline\n");
8054 				}
8055 				break;
8056 			default:
8057 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8058 				break;
8059 			}
8060 			break;
8061 		case 8: /* D1 page flip */
8062 		case 10: /* D2 page flip */
8063 		case 12: /* D3 page flip */
8064 		case 14: /* D4 page flip */
8065 		case 16: /* D5 page flip */
8066 		case 18: /* D6 page flip */
8067 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8068 			if (radeon_use_pflipirq > 0)
8069 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8070 			break;
8071 		case 42: /* HPD hotplug */
8072 			switch (src_data) {
8073 			case 0:
8074 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8075 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8076 					queue_hotplug = true;
8077 					DRM_DEBUG("IH: HPD1\n");
8078 				}
8079 				break;
8080 			case 1:
8081 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8082 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8083 					queue_hotplug = true;
8084 					DRM_DEBUG("IH: HPD2\n");
8085 				}
8086 				break;
8087 			case 2:
8088 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8089 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8090 					queue_hotplug = true;
8091 					DRM_DEBUG("IH: HPD3\n");
8092 				}
8093 				break;
8094 			case 3:
8095 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8096 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8097 					queue_hotplug = true;
8098 					DRM_DEBUG("IH: HPD4\n");
8099 				}
8100 				break;
8101 			case 4:
8102 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8103 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8104 					queue_hotplug = true;
8105 					DRM_DEBUG("IH: HPD5\n");
8106 				}
8107 				break;
8108 			case 5:
8109 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8110 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8111 					queue_hotplug = true;
8112 					DRM_DEBUG("IH: HPD6\n");
8113 				}
8114 				break;
8115 			case 6:
8116 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8117 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8118 					queue_dp = true;
8119 					DRM_DEBUG("IH: HPD_RX 1\n");
8120 				}
8121 				break;
8122 			case 7:
8123 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8124 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8125 					queue_dp = true;
8126 					DRM_DEBUG("IH: HPD_RX 2\n");
8127 				}
8128 				break;
8129 			case 8:
8130 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8131 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8132 					queue_dp = true;
8133 					DRM_DEBUG("IH: HPD_RX 3\n");
8134 				}
8135 				break;
8136 			case 9:
8137 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8138 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8139 					queue_dp = true;
8140 					DRM_DEBUG("IH: HPD_RX 4\n");
8141 				}
8142 				break;
8143 			case 10:
8144 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8145 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8146 					queue_dp = true;
8147 					DRM_DEBUG("IH: HPD_RX 5\n");
8148 				}
8149 				break;
8150 			case 11:
8151 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8152 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8153 					queue_dp = true;
8154 					DRM_DEBUG("IH: HPD_RX 6\n");
8155 				}
8156 				break;
8157 			default:
8158 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8159 				break;
8160 			}
8161 			break;
8162 		case 96:
8163 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8164 			WREG32(SRBM_INT_ACK, 0x1);
8165 			break;
8166 		case 124: /* UVD */
8167 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8168 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8169 			break;
8170 		case 146:
8171 		case 147:
8172 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8173 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8174 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8175 			/* reset addr and status */
8176 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8177 			if (addr == 0x0 && status == 0x0)
8178 				break;
8179 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8180 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8181 				addr);
8182 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8183 				status);
8184 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8185 			break;
8186 		case 167: /* VCE */
8187 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8188 			switch (src_data) {
8189 			case 0:
8190 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8191 				break;
8192 			case 1:
8193 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8194 				break;
8195 			default:
8196 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8197 				break;
8198 			}
8199 			break;
8200 		case 176: /* GFX RB CP_INT */
8201 		case 177: /* GFX IB CP_INT */
8202 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8203 			break;
8204 		case 181: /* CP EOP event */
8205 			DRM_DEBUG("IH: CP EOP\n");
8206 			/* XXX check the bitfield order! */
8207 			me_id = (ring_id & 0x60) >> 5;
8208 			pipe_id = (ring_id & 0x18) >> 3;
8209 			queue_id = (ring_id & 0x7) >> 0;
8210 			switch (me_id) {
8211 			case 0:
8212 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8213 				break;
8214 			case 1:
8215 			case 2:
8216 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8217 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8218 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8219 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8220 				break;
8221 			}
8222 			break;
8223 		case 184: /* CP Privileged reg access */
8224 			DRM_ERROR("Illegal register access in command stream\n");
8225 			/* XXX check the bitfield order! */
8226 			me_id = (ring_id & 0x60) >> 5;
8227 			pipe_id = (ring_id & 0x18) >> 3;
8228 			queue_id = (ring_id & 0x7) >> 0;
8229 			switch (me_id) {
8230 			case 0:
8231 				/* This results in a full GPU reset, but all we need to do is soft
8232 				 * reset the CP for gfx
8233 				 */
8234 				queue_reset = true;
8235 				break;
8236 			case 1:
8237 				/* XXX compute */
8238 				queue_reset = true;
8239 				break;
8240 			case 2:
8241 				/* XXX compute */
8242 				queue_reset = true;
8243 				break;
8244 			}
8245 			break;
8246 		case 185: /* CP Privileged inst */
8247 			DRM_ERROR("Illegal instruction in command stream\n");
8248 			/* XXX check the bitfield order! */
8249 			me_id = (ring_id & 0x60) >> 5;
8250 			pipe_id = (ring_id & 0x18) >> 3;
8251 			queue_id = (ring_id & 0x7) >> 0;
8252 			switch (me_id) {
8253 			case 0:
8254 				/* This results in a full GPU reset, but all we need to do is soft
8255 				 * reset the CP for gfx
8256 				 */
8257 				queue_reset = true;
8258 				break;
8259 			case 1:
8260 				/* XXX compute */
8261 				queue_reset = true;
8262 				break;
8263 			case 2:
8264 				/* XXX compute */
8265 				queue_reset = true;
8266 				break;
8267 			}
8268 			break;
8269 		case 224: /* SDMA trap event */
8270 			/* XXX check the bitfield order! */
8271 			me_id = (ring_id & 0x3) >> 0;
8272 			queue_id = (ring_id & 0xc) >> 2;
8273 			DRM_DEBUG("IH: SDMA trap\n");
8274 			switch (me_id) {
8275 			case 0:
8276 				switch (queue_id) {
8277 				case 0:
8278 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8279 					break;
8280 				case 1:
8281 					/* XXX compute */
8282 					break;
8283 				case 2:
8284 					/* XXX compute */
8285 					break;
8286 				}
8287 				break;
8288 			case 1:
8289 				switch (queue_id) {
8290 				case 0:
8291 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8292 					break;
8293 				case 1:
8294 					/* XXX compute */
8295 					break;
8296 				case 2:
8297 					/* XXX compute */
8298 					break;
8299 				}
8300 				break;
8301 			}
8302 			break;
8303 		case 230: /* thermal low to high */
8304 			DRM_DEBUG("IH: thermal low to high\n");
8305 			rdev->pm.dpm.thermal.high_to_low = false;
8306 			queue_thermal = true;
8307 			break;
8308 		case 231: /* thermal high to low */
8309 			DRM_DEBUG("IH: thermal high to low\n");
8310 			rdev->pm.dpm.thermal.high_to_low = true;
8311 			queue_thermal = true;
8312 			break;
8313 		case 233: /* GUI IDLE */
8314 			DRM_DEBUG("IH: GUI idle\n");
8315 			break;
8316 		case 241: /* SDMA Privileged inst */
8317 		case 247: /* SDMA Privileged inst */
8318 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8319 			/* XXX check the bitfield order! */
8320 			me_id = (ring_id & 0x3) >> 0;
8321 			queue_id = (ring_id & 0xc) >> 2;
8322 			switch (me_id) {
8323 			case 0:
8324 				switch (queue_id) {
8325 				case 0:
8326 					queue_reset = true;
8327 					break;
8328 				case 1:
8329 					/* XXX compute */
8330 					queue_reset = true;
8331 					break;
8332 				case 2:
8333 					/* XXX compute */
8334 					queue_reset = true;
8335 					break;
8336 				}
8337 				break;
8338 			case 1:
8339 				switch (queue_id) {
8340 				case 0:
8341 					queue_reset = true;
8342 					break;
8343 				case 1:
8344 					/* XXX compute */
8345 					queue_reset = true;
8346 					break;
8347 				case 2:
8348 					/* XXX compute */
8349 					queue_reset = true;
8350 					break;
8351 				}
8352 				break;
8353 			}
8354 			break;
8355 		default:
8356 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8357 			break;
8358 		}
8359 
8360 		/* wptr/rptr are in bytes! */
8361 		rptr += 16;
8362 		rptr &= rdev->ih.ptr_mask;
8363 		WREG32(IH_RB_RPTR, rptr);
8364 	}
8365 	if (queue_dp)
8366 		schedule_work(&rdev->dp_work);
8367 	if (queue_hotplug)
8368 		schedule_work(&rdev->hotplug_work);
8369 	if (queue_reset) {
8370 		rdev->needs_reset = true;
8371 		wake_up_all(&rdev->fence_queue);
8372 	}
8373 	if (queue_thermal)
8374 		schedule_work(&rdev->pm.dpm.thermal.work);
8375 	rdev->ih.rptr = rptr;
8376 	atomic_set(&rdev->ih.lock, 0);
8377 
8378 	/* make sure wptr hasn't changed while processing */
8379 	wptr = cik_get_ih_wptr(rdev);
8380 	if (wptr != rptr)
8381 		goto restart_ih;
8382 
8383 	return IRQ_HANDLED;
8384 }
8385 
8386 /*
8387  * startup/shutdown callbacks
8388  */
8389 /**
8390  * cik_startup - program the asic to a functional state
8391  *
8392  * @rdev: radeon_device pointer
8393  *
8394  * Programs the asic to a functional state (CIK).
8395  * Called by cik_init() and cik_resume().
8396  * Returns 0 for success, error for failure.
8397  */
8398 static int cik_startup(struct radeon_device *rdev)
8399 {
8400 	struct radeon_ring *ring;
8401 	u32 nop;
8402 	int r;
8403 
8404 	/* enable pcie gen2/3 link */
8405 	cik_pcie_gen3_enable(rdev);
8406 	/* enable aspm */
8407 	cik_program_aspm(rdev);
8408 
8409 	/* scratch needs to be initialized before MC */
8410 	r = r600_vram_scratch_init(rdev);
8411 	if (r)
8412 		return r;
8413 
8414 	cik_mc_program(rdev);
8415 
8416 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8417 		r = ci_mc_load_microcode(rdev);
8418 		if (r) {
8419 			DRM_ERROR("Failed to load MC firmware!\n");
8420 			return r;
8421 		}
8422 	}
8423 
8424 	r = cik_pcie_gart_enable(rdev);
8425 	if (r)
8426 		return r;
8427 	cik_gpu_init(rdev);
8428 
8429 	/* allocate rlc buffers */
8430 	if (rdev->flags & RADEON_IS_IGP) {
8431 		if (rdev->family == CHIP_KAVERI) {
8432 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8433 			rdev->rlc.reg_list_size =
8434 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8435 		} else {
8436 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8437 			rdev->rlc.reg_list_size =
8438 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8439 		}
8440 	}
8441 	rdev->rlc.cs_data = ci_cs_data;
8442 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8443 	r = sumo_rlc_init(rdev);
8444 	if (r) {
8445 		DRM_ERROR("Failed to init rlc BOs!\n");
8446 		return r;
8447 	}
8448 
8449 	/* allocate wb buffer */
8450 	r = radeon_wb_init(rdev);
8451 	if (r)
8452 		return r;
8453 
8454 	/* allocate mec buffers */
8455 	r = cik_mec_init(rdev);
8456 	if (r) {
8457 		DRM_ERROR("Failed to init MEC BOs!\n");
8458 		return r;
8459 	}
8460 
8461 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8462 	if (r) {
8463 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8464 		return r;
8465 	}
8466 
8467 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8468 	if (r) {
8469 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8470 		return r;
8471 	}
8472 
8473 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8474 	if (r) {
8475 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8476 		return r;
8477 	}
8478 
8479 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8480 	if (r) {
8481 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8482 		return r;
8483 	}
8484 
8485 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8486 	if (r) {
8487 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8488 		return r;
8489 	}
8490 
8491 	r = radeon_uvd_resume(rdev);
8492 	if (!r) {
8493 		r = uvd_v4_2_resume(rdev);
8494 		if (!r) {
8495 			r = radeon_fence_driver_start_ring(rdev,
8496 							   R600_RING_TYPE_UVD_INDEX);
8497 			if (r)
8498 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8499 		}
8500 	}
8501 	if (r)
8502 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8503 
8504 	r = radeon_vce_resume(rdev);
8505 	if (!r) {
8506 		r = vce_v2_0_resume(rdev);
8507 		if (!r)
8508 			r = radeon_fence_driver_start_ring(rdev,
8509 							   TN_RING_TYPE_VCE1_INDEX);
8510 		if (!r)
8511 			r = radeon_fence_driver_start_ring(rdev,
8512 							   TN_RING_TYPE_VCE2_INDEX);
8513 	}
8514 	if (r) {
8515 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8516 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8517 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8518 	}
8519 
8520 	/* Enable IRQ */
8521 	if (!rdev->irq.installed) {
8522 		r = radeon_irq_kms_init(rdev);
8523 		if (r)
8524 			return r;
8525 	}
8526 
8527 	r = cik_irq_init(rdev);
8528 	if (r) {
8529 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8530 		radeon_irq_kms_fini(rdev);
8531 		return r;
8532 	}
8533 	cik_irq_set(rdev);
8534 
8535 	if (rdev->family == CHIP_HAWAII) {
8536 		if (rdev->new_fw)
8537 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8538 		else
8539 			nop = RADEON_CP_PACKET2;
8540 	} else {
8541 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8542 	}
8543 
8544 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8545 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8546 			     nop);
8547 	if (r)
8548 		return r;
8549 
8550 	/* set up the compute queues */
8551 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8552 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8553 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8554 			     nop);
8555 	if (r)
8556 		return r;
8557 	ring->me = 1; /* first MEC */
8558 	ring->pipe = 0; /* first pipe */
8559 	ring->queue = 0; /* first queue */
8560 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8561 
8562 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8563 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8564 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8565 			     nop);
8566 	if (r)
8567 		return r;
8568 	/* dGPU only have 1 MEC */
8569 	ring->me = 1; /* first MEC */
8570 	ring->pipe = 0; /* first pipe */
8571 	ring->queue = 1; /* second queue */
8572 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8573 
8574 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8575 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8576 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8577 	if (r)
8578 		return r;
8579 
8580 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8582 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8583 	if (r)
8584 		return r;
8585 
8586 	r = cik_cp_resume(rdev);
8587 	if (r)
8588 		return r;
8589 
8590 	r = cik_sdma_resume(rdev);
8591 	if (r)
8592 		return r;
8593 
8594 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8595 	if (ring->ring_size) {
8596 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8597 				     RADEON_CP_PACKET2);
8598 		if (!r)
8599 			r = uvd_v1_0_init(rdev);
8600 		if (r)
8601 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8602 	}
8603 
8604 	r = -ENOENT;
8605 
8606 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8607 	if (ring->ring_size)
8608 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8609 				     VCE_CMD_NO_OP);
8610 
8611 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8612 	if (ring->ring_size)
8613 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8614 				     VCE_CMD_NO_OP);
8615 
8616 	if (!r)
8617 		r = vce_v1_0_init(rdev);
8618 	else if (r != -ENOENT)
8619 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8620 
8621 	r = radeon_ib_pool_init(rdev);
8622 	if (r) {
8623 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8624 		return r;
8625 	}
8626 
8627 	r = radeon_vm_manager_init(rdev);
8628 	if (r) {
8629 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8630 		return r;
8631 	}
8632 
8633 	r = radeon_audio_init(rdev);
8634 	if (r)
8635 		return r;
8636 
8637 	r = radeon_kfd_resume(rdev);
8638 	if (r)
8639 		return r;
8640 
8641 	return 0;
8642 }
8643 
8644 /**
8645  * cik_resume - resume the asic to a functional state
8646  *
8647  * @rdev: radeon_device pointer
8648  *
8649  * Programs the asic to a functional state (CIK).
8650  * Called at resume.
8651  * Returns 0 for success, error for failure.
8652  */
8653 int cik_resume(struct radeon_device *rdev)
8654 {
8655 	int r;
8656 
8657 	/* post card */
8658 	atom_asic_init(rdev->mode_info.atom_context);
8659 
8660 	/* init golden registers */
8661 	cik_init_golden_registers(rdev);
8662 
8663 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8664 		radeon_pm_resume(rdev);
8665 
8666 	rdev->accel_working = true;
8667 	r = cik_startup(rdev);
8668 	if (r) {
8669 		DRM_ERROR("cik startup failed on resume\n");
8670 		rdev->accel_working = false;
8671 		return r;
8672 	}
8673 
8674 	return r;
8675 
8676 }
8677 
8678 /**
8679  * cik_suspend - suspend the asic
8680  *
8681  * @rdev: radeon_device pointer
8682  *
8683  * Bring the chip into a state suitable for suspend (CIK).
8684  * Called at suspend.
8685  * Returns 0 for success.
8686  */
8687 int cik_suspend(struct radeon_device *rdev)
8688 {
8689 	radeon_kfd_suspend(rdev);
8690 	radeon_pm_suspend(rdev);
8691 	radeon_audio_fini(rdev);
8692 	radeon_vm_manager_fini(rdev);
8693 	cik_cp_enable(rdev, false);
8694 	cik_sdma_enable(rdev, false);
8695 	uvd_v1_0_fini(rdev);
8696 	radeon_uvd_suspend(rdev);
8697 	radeon_vce_suspend(rdev);
8698 	cik_fini_pg(rdev);
8699 	cik_fini_cg(rdev);
8700 	cik_irq_suspend(rdev);
8701 	radeon_wb_disable(rdev);
8702 	cik_pcie_gart_disable(rdev);
8703 	return 0;
8704 }
8705 
8706 /* Plan is to move initialization in that function and use
8707  * helper function so that radeon_device_init pretty much
8708  * do nothing more than calling asic specific function. This
8709  * should also allow to remove a bunch of callback function
8710  * like vram_info.
8711  */
8712 /**
8713  * cik_init - asic specific driver and hw init
8714  *
8715  * @rdev: radeon_device pointer
8716  *
8717  * Setup asic specific driver variables and program the hw
8718  * to a functional state (CIK).
8719  * Called at driver startup.
8720  * Returns 0 for success, errors for failure.
8721  */
8722 int cik_init(struct radeon_device *rdev)
8723 {
8724 	struct radeon_ring *ring;
8725 	int r;
8726 
8727 	/* Read BIOS */
8728 	if (!radeon_get_bios(rdev)) {
8729 		if (ASIC_IS_AVIVO(rdev))
8730 			return -EINVAL;
8731 	}
8732 	/* Must be an ATOMBIOS */
8733 	if (!rdev->is_atom_bios) {
8734 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8735 		return -EINVAL;
8736 	}
8737 	r = radeon_atombios_init(rdev);
8738 	if (r)
8739 		return r;
8740 
8741 	/* Post card if necessary */
8742 	if (!radeon_card_posted(rdev)) {
8743 		if (!rdev->bios) {
8744 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8745 			return -EINVAL;
8746 		}
8747 		DRM_INFO("GPU not posted. posting now...\n");
8748 		atom_asic_init(rdev->mode_info.atom_context);
8749 	}
8750 	/* init golden registers */
8751 	cik_init_golden_registers(rdev);
8752 	/* Initialize scratch registers */
8753 	cik_scratch_init(rdev);
8754 	/* Initialize surface registers */
8755 	radeon_surface_init(rdev);
8756 	/* Initialize clocks */
8757 	radeon_get_clock_info(rdev->ddev);
8758 
8759 	/* Fence driver */
8760 	r = radeon_fence_driver_init(rdev);
8761 	if (r)
8762 		return r;
8763 
8764 	/* initialize memory controller */
8765 	r = cik_mc_init(rdev);
8766 	if (r)
8767 		return r;
8768 	/* Memory manager */
8769 	r = radeon_bo_init(rdev);
8770 	if (r)
8771 		return r;
8772 
8773 	if (rdev->flags & RADEON_IS_IGP) {
8774 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8775 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8776 			r = cik_init_microcode(rdev);
8777 			if (r) {
8778 				DRM_ERROR("Failed to load firmware!\n");
8779 				return r;
8780 			}
8781 		}
8782 	} else {
8783 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8784 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8785 		    !rdev->mc_fw) {
8786 			r = cik_init_microcode(rdev);
8787 			if (r) {
8788 				DRM_ERROR("Failed to load firmware!\n");
8789 				return r;
8790 			}
8791 		}
8792 	}
8793 
8794 	/* Initialize power management */
8795 	radeon_pm_init(rdev);
8796 
8797 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8798 	ring->ring_obj = NULL;
8799 	r600_ring_init(rdev, ring, 1024 * 1024);
8800 
8801 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8802 	ring->ring_obj = NULL;
8803 	r600_ring_init(rdev, ring, 1024 * 1024);
8804 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8805 	if (r)
8806 		return r;
8807 
8808 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8809 	ring->ring_obj = NULL;
8810 	r600_ring_init(rdev, ring, 1024 * 1024);
8811 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8812 	if (r)
8813 		return r;
8814 
8815 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8816 	ring->ring_obj = NULL;
8817 	r600_ring_init(rdev, ring, 256 * 1024);
8818 
8819 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8820 	ring->ring_obj = NULL;
8821 	r600_ring_init(rdev, ring, 256 * 1024);
8822 
8823 	r = radeon_uvd_init(rdev);
8824 	if (!r) {
8825 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8826 		ring->ring_obj = NULL;
8827 		r600_ring_init(rdev, ring, 4096);
8828 	}
8829 
8830 	r = radeon_vce_init(rdev);
8831 	if (!r) {
8832 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8833 		ring->ring_obj = NULL;
8834 		r600_ring_init(rdev, ring, 4096);
8835 
8836 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8837 		ring->ring_obj = NULL;
8838 		r600_ring_init(rdev, ring, 4096);
8839 	}
8840 
8841 	rdev->ih.ring_obj = NULL;
8842 	r600_ih_ring_init(rdev, 64 * 1024);
8843 
8844 	r = r600_pcie_gart_init(rdev);
8845 	if (r)
8846 		return r;
8847 
8848 	rdev->accel_working = true;
8849 	r = cik_startup(rdev);
8850 	if (r) {
8851 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8852 		cik_cp_fini(rdev);
8853 		cik_sdma_fini(rdev);
8854 		cik_irq_fini(rdev);
8855 		sumo_rlc_fini(rdev);
8856 		cik_mec_fini(rdev);
8857 		radeon_wb_fini(rdev);
8858 		radeon_ib_pool_fini(rdev);
8859 		radeon_vm_manager_fini(rdev);
8860 		radeon_irq_kms_fini(rdev);
8861 		cik_pcie_gart_fini(rdev);
8862 		rdev->accel_working = false;
8863 	}
8864 
8865 	/* Don't start up if the MC ucode is missing.
8866 	 * The default clocks and voltages before the MC ucode
8867 	 * is loaded are not suffient for advanced operations.
8868 	 */
8869 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8870 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8871 		return -EINVAL;
8872 	}
8873 
8874 	return 0;
8875 }
8876 
8877 /**
8878  * cik_fini - asic specific driver and hw fini
8879  *
8880  * @rdev: radeon_device pointer
8881  *
8882  * Tear down the asic specific driver variables and program the hw
8883  * to an idle state (CIK).
8884  * Called at driver unload.
8885  */
8886 void cik_fini(struct radeon_device *rdev)
8887 {
8888 	radeon_pm_fini(rdev);
8889 	cik_cp_fini(rdev);
8890 	cik_sdma_fini(rdev);
8891 	cik_fini_pg(rdev);
8892 	cik_fini_cg(rdev);
8893 	cik_irq_fini(rdev);
8894 	sumo_rlc_fini(rdev);
8895 	cik_mec_fini(rdev);
8896 	radeon_wb_fini(rdev);
8897 	radeon_vm_manager_fini(rdev);
8898 	radeon_ib_pool_fini(rdev);
8899 	radeon_irq_kms_fini(rdev);
8900 	uvd_v1_0_fini(rdev);
8901 	radeon_uvd_fini(rdev);
8902 	radeon_vce_fini(rdev);
8903 	cik_pcie_gart_fini(rdev);
8904 	r600_vram_scratch_fini(rdev);
8905 	radeon_gem_fini(rdev);
8906 	radeon_fence_driver_fini(rdev);
8907 	radeon_bo_fini(rdev);
8908 	radeon_atombios_fini(rdev);
8909 	kfree(rdev->bios);
8910 	rdev->bios = NULL;
8911 }
8912 
8913 void dce8_program_fmt(struct drm_encoder *encoder)
8914 {
8915 	struct drm_device *dev = encoder->dev;
8916 	struct radeon_device *rdev = dev->dev_private;
8917 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8918 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8919 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8920 	int bpc = 0;
8921 	u32 tmp = 0;
8922 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8923 
8924 	if (connector) {
8925 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8926 		bpc = radeon_get_monitor_bpc(connector);
8927 		dither = radeon_connector->dither;
8928 	}
8929 
8930 	/* LVDS/eDP FMT is set up by atom */
8931 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8932 		return;
8933 
8934 	/* not needed for analog */
8935 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8936 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8937 		return;
8938 
8939 	if (bpc == 0)
8940 		return;
8941 
8942 	switch (bpc) {
8943 	case 6:
8944 		if (dither == RADEON_FMT_DITHER_ENABLE)
8945 			/* XXX sort out optimal dither settings */
8946 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8947 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8948 		else
8949 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8950 		break;
8951 	case 8:
8952 		if (dither == RADEON_FMT_DITHER_ENABLE)
8953 			/* XXX sort out optimal dither settings */
8954 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8955 				FMT_RGB_RANDOM_ENABLE |
8956 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8957 		else
8958 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8959 		break;
8960 	case 10:
8961 		if (dither == RADEON_FMT_DITHER_ENABLE)
8962 			/* XXX sort out optimal dither settings */
8963 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8964 				FMT_RGB_RANDOM_ENABLE |
8965 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8966 		else
8967 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8968 		break;
8969 	default:
8970 		/* not needed */
8971 		break;
8972 	}
8973 
8974 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8975 }
8976 
8977 /* display watermark setup */
8978 /**
8979  * dce8_line_buffer_adjust - Set up the line buffer
8980  *
8981  * @rdev: radeon_device pointer
8982  * @radeon_crtc: the selected display controller
8983  * @mode: the current display mode on the selected display
8984  * controller
8985  *
8986  * Setup up the line buffer allocation for
8987  * the selected display controller (CIK).
8988  * Returns the line buffer size in pixels.
8989  */
8990 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8991 				   struct radeon_crtc *radeon_crtc,
8992 				   struct drm_display_mode *mode)
8993 {
8994 	u32 tmp, buffer_alloc, i;
8995 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8996 	/*
8997 	 * Line Buffer Setup
8998 	 * There are 6 line buffers, one for each display controllers.
8999 	 * There are 3 partitions per LB. Select the number of partitions
9000 	 * to enable based on the display width.  For display widths larger
9001 	 * than 4096, you need use to use 2 display controllers and combine
9002 	 * them using the stereo blender.
9003 	 */
9004 	if (radeon_crtc->base.enabled && mode) {
9005 		if (mode->crtc_hdisplay < 1920) {
9006 			tmp = 1;
9007 			buffer_alloc = 2;
9008 		} else if (mode->crtc_hdisplay < 2560) {
9009 			tmp = 2;
9010 			buffer_alloc = 2;
9011 		} else if (mode->crtc_hdisplay < 4096) {
9012 			tmp = 0;
9013 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9014 		} else {
9015 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9016 			tmp = 0;
9017 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9018 		}
9019 	} else {
9020 		tmp = 1;
9021 		buffer_alloc = 0;
9022 	}
9023 
9024 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9025 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9026 
9027 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9028 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9029 	for (i = 0; i < rdev->usec_timeout; i++) {
9030 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9031 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9032 			break;
9033 		udelay(1);
9034 	}
9035 
9036 	if (radeon_crtc->base.enabled && mode) {
9037 		switch (tmp) {
9038 		case 0:
9039 		default:
9040 			return 4096 * 2;
9041 		case 1:
9042 			return 1920 * 2;
9043 		case 2:
9044 			return 2560 * 2;
9045 		}
9046 	}
9047 
9048 	/* controller not enabled, so no lb used */
9049 	return 0;
9050 }
9051 
9052 /**
9053  * cik_get_number_of_dram_channels - get the number of dram channels
9054  *
9055  * @rdev: radeon_device pointer
9056  *
9057  * Look up the number of video ram channels (CIK).
9058  * Used for display watermark bandwidth calculations
9059  * Returns the number of dram channels
9060  */
9061 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9062 {
9063 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9064 
9065 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9066 	case 0:
9067 	default:
9068 		return 1;
9069 	case 1:
9070 		return 2;
9071 	case 2:
9072 		return 4;
9073 	case 3:
9074 		return 8;
9075 	case 4:
9076 		return 3;
9077 	case 5:
9078 		return 6;
9079 	case 6:
9080 		return 10;
9081 	case 7:
9082 		return 12;
9083 	case 8:
9084 		return 16;
9085 	}
9086 }
9087 
9088 struct dce8_wm_params {
9089 	u32 dram_channels; /* number of dram channels */
9090 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9091 	u32 sclk;          /* engine clock in kHz */
9092 	u32 disp_clk;      /* display clock in kHz */
9093 	u32 src_width;     /* viewport width */
9094 	u32 active_time;   /* active display time in ns */
9095 	u32 blank_time;    /* blank time in ns */
9096 	bool interlaced;    /* mode is interlaced */
9097 	fixed20_12 vsc;    /* vertical scale ratio */
9098 	u32 num_heads;     /* number of active crtcs */
9099 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9100 	u32 lb_size;       /* line buffer allocated to pipe */
9101 	u32 vtaps;         /* vertical scaler taps */
9102 };
9103 
9104 /**
9105  * dce8_dram_bandwidth - get the dram bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the raw dram bandwidth (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the dram bandwidth in MBytes/s
9112  */
9113 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9114 {
9115 	/* Calculate raw DRAM Bandwidth */
9116 	fixed20_12 dram_efficiency; /* 0.7 */
9117 	fixed20_12 yclk, dram_channels, bandwidth;
9118 	fixed20_12 a;
9119 
9120 	a.full = dfixed_const(1000);
9121 	yclk.full = dfixed_const(wm->yclk);
9122 	yclk.full = dfixed_div(yclk, a);
9123 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9124 	a.full = dfixed_const(10);
9125 	dram_efficiency.full = dfixed_const(7);
9126 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9127 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9128 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9129 
9130 	return dfixed_trunc(bandwidth);
9131 }
9132 
9133 /**
9134  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9135  *
9136  * @wm: watermark calculation data
9137  *
9138  * Calculate the dram bandwidth used for display (CIK).
9139  * Used for display watermark bandwidth calculations
9140  * Returns the dram bandwidth for display in MBytes/s
9141  */
9142 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9143 {
9144 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9145 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9146 	fixed20_12 yclk, dram_channels, bandwidth;
9147 	fixed20_12 a;
9148 
9149 	a.full = dfixed_const(1000);
9150 	yclk.full = dfixed_const(wm->yclk);
9151 	yclk.full = dfixed_div(yclk, a);
9152 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9153 	a.full = dfixed_const(10);
9154 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9155 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9156 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9157 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9158 
9159 	return dfixed_trunc(bandwidth);
9160 }
9161 
9162 /**
9163  * dce8_data_return_bandwidth - get the data return bandwidth
9164  *
9165  * @wm: watermark calculation data
9166  *
9167  * Calculate the data return bandwidth used for display (CIK).
9168  * Used for display watermark bandwidth calculations
9169  * Returns the data return bandwidth in MBytes/s
9170  */
9171 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9172 {
9173 	/* Calculate the display Data return Bandwidth */
9174 	fixed20_12 return_efficiency; /* 0.8 */
9175 	fixed20_12 sclk, bandwidth;
9176 	fixed20_12 a;
9177 
9178 	a.full = dfixed_const(1000);
9179 	sclk.full = dfixed_const(wm->sclk);
9180 	sclk.full = dfixed_div(sclk, a);
9181 	a.full = dfixed_const(10);
9182 	return_efficiency.full = dfixed_const(8);
9183 	return_efficiency.full = dfixed_div(return_efficiency, a);
9184 	a.full = dfixed_const(32);
9185 	bandwidth.full = dfixed_mul(a, sclk);
9186 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9187 
9188 	return dfixed_trunc(bandwidth);
9189 }
9190 
9191 /**
9192  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9193  *
9194  * @wm: watermark calculation data
9195  *
9196  * Calculate the dmif bandwidth used for display (CIK).
9197  * Used for display watermark bandwidth calculations
9198  * Returns the dmif bandwidth in MBytes/s
9199  */
9200 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9201 {
9202 	/* Calculate the DMIF Request Bandwidth */
9203 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9204 	fixed20_12 disp_clk, bandwidth;
9205 	fixed20_12 a, b;
9206 
9207 	a.full = dfixed_const(1000);
9208 	disp_clk.full = dfixed_const(wm->disp_clk);
9209 	disp_clk.full = dfixed_div(disp_clk, a);
9210 	a.full = dfixed_const(32);
9211 	b.full = dfixed_mul(a, disp_clk);
9212 
9213 	a.full = dfixed_const(10);
9214 	disp_clk_request_efficiency.full = dfixed_const(8);
9215 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9216 
9217 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9218 
9219 	return dfixed_trunc(bandwidth);
9220 }
9221 
9222 /**
9223  * dce8_available_bandwidth - get the min available bandwidth
9224  *
9225  * @wm: watermark calculation data
9226  *
9227  * Calculate the min available bandwidth used for display (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns the min available bandwidth in MBytes/s
9230  */
9231 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9232 {
9233 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9234 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9235 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9236 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9237 
9238 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9239 }
9240 
9241 /**
9242  * dce8_average_bandwidth - get the average available bandwidth
9243  *
9244  * @wm: watermark calculation data
9245  *
9246  * Calculate the average available bandwidth used for display (CIK).
9247  * Used for display watermark bandwidth calculations
9248  * Returns the average available bandwidth in MBytes/s
9249  */
9250 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9251 {
9252 	/* Calculate the display mode Average Bandwidth
9253 	 * DisplayMode should contain the source and destination dimensions,
9254 	 * timing, etc.
9255 	 */
9256 	fixed20_12 bpp;
9257 	fixed20_12 line_time;
9258 	fixed20_12 src_width;
9259 	fixed20_12 bandwidth;
9260 	fixed20_12 a;
9261 
9262 	a.full = dfixed_const(1000);
9263 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9264 	line_time.full = dfixed_div(line_time, a);
9265 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9266 	src_width.full = dfixed_const(wm->src_width);
9267 	bandwidth.full = dfixed_mul(src_width, bpp);
9268 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9269 	bandwidth.full = dfixed_div(bandwidth, line_time);
9270 
9271 	return dfixed_trunc(bandwidth);
9272 }
9273 
9274 /**
9275  * dce8_latency_watermark - get the latency watermark
9276  *
9277  * @wm: watermark calculation data
9278  *
9279  * Calculate the latency watermark (CIK).
9280  * Used for display watermark bandwidth calculations
9281  * Returns the latency watermark in ns
9282  */
9283 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9284 {
9285 	/* First calculate the latency in ns */
9286 	u32 mc_latency = 2000; /* 2000 ns. */
9287 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9288 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9289 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9290 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9291 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9292 		(wm->num_heads * cursor_line_pair_return_time);
9293 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9294 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9295 	u32 tmp, dmif_size = 12288;
9296 	fixed20_12 a, b, c;
9297 
9298 	if (wm->num_heads == 0)
9299 		return 0;
9300 
9301 	a.full = dfixed_const(2);
9302 	b.full = dfixed_const(1);
9303 	if ((wm->vsc.full > a.full) ||
9304 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9305 	    (wm->vtaps >= 5) ||
9306 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9307 		max_src_lines_per_dst_line = 4;
9308 	else
9309 		max_src_lines_per_dst_line = 2;
9310 
9311 	a.full = dfixed_const(available_bandwidth);
9312 	b.full = dfixed_const(wm->num_heads);
9313 	a.full = dfixed_div(a, b);
9314 
9315 	b.full = dfixed_const(mc_latency + 512);
9316 	c.full = dfixed_const(wm->disp_clk);
9317 	b.full = dfixed_div(b, c);
9318 
9319 	c.full = dfixed_const(dmif_size);
9320 	b.full = dfixed_div(c, b);
9321 
9322 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9323 
9324 	b.full = dfixed_const(1000);
9325 	c.full = dfixed_const(wm->disp_clk);
9326 	b.full = dfixed_div(c, b);
9327 	c.full = dfixed_const(wm->bytes_per_pixel);
9328 	b.full = dfixed_mul(b, c);
9329 
9330 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9331 
9332 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9333 	b.full = dfixed_const(1000);
9334 	c.full = dfixed_const(lb_fill_bw);
9335 	b.full = dfixed_div(c, b);
9336 	a.full = dfixed_div(a, b);
9337 	line_fill_time = dfixed_trunc(a);
9338 
9339 	if (line_fill_time < wm->active_time)
9340 		return latency;
9341 	else
9342 		return latency + (line_fill_time - wm->active_time);
9343 
9344 }
9345 
9346 /**
9347  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9348  * average and available dram bandwidth
9349  *
9350  * @wm: watermark calculation data
9351  *
9352  * Check if the display average bandwidth fits in the display
9353  * dram bandwidth (CIK).
9354  * Used for display watermark bandwidth calculations
9355  * Returns true if the display fits, false if not.
9356  */
9357 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9358 {
9359 	if (dce8_average_bandwidth(wm) <=
9360 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9361 		return true;
9362 	else
9363 		return false;
9364 }
9365 
9366 /**
9367  * dce8_average_bandwidth_vs_available_bandwidth - check
9368  * average and available bandwidth
9369  *
9370  * @wm: watermark calculation data
9371  *
9372  * Check if the display average bandwidth fits in the display
9373  * available bandwidth (CIK).
9374  * Used for display watermark bandwidth calculations
9375  * Returns true if the display fits, false if not.
9376  */
9377 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9378 {
9379 	if (dce8_average_bandwidth(wm) <=
9380 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9381 		return true;
9382 	else
9383 		return false;
9384 }
9385 
9386 /**
9387  * dce8_check_latency_hiding - check latency hiding
9388  *
9389  * @wm: watermark calculation data
9390  *
9391  * Check latency hiding (CIK).
9392  * Used for display watermark bandwidth calculations
9393  * Returns true if the display fits, false if not.
9394  */
9395 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9396 {
9397 	u32 lb_partitions = wm->lb_size / wm->src_width;
9398 	u32 line_time = wm->active_time + wm->blank_time;
9399 	u32 latency_tolerant_lines;
9400 	u32 latency_hiding;
9401 	fixed20_12 a;
9402 
9403 	a.full = dfixed_const(1);
9404 	if (wm->vsc.full > a.full)
9405 		latency_tolerant_lines = 1;
9406 	else {
9407 		if (lb_partitions <= (wm->vtaps + 1))
9408 			latency_tolerant_lines = 1;
9409 		else
9410 			latency_tolerant_lines = 2;
9411 	}
9412 
9413 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9414 
9415 	if (dce8_latency_watermark(wm) <= latency_hiding)
9416 		return true;
9417 	else
9418 		return false;
9419 }
9420 
9421 /**
9422  * dce8_program_watermarks - program display watermarks
9423  *
9424  * @rdev: radeon_device pointer
9425  * @radeon_crtc: the selected display controller
9426  * @lb_size: line buffer size
9427  * @num_heads: number of display controllers in use
9428  *
9429  * Calculate and program the display watermarks for the
9430  * selected display controller (CIK).
9431  */
9432 static void dce8_program_watermarks(struct radeon_device *rdev,
9433 				    struct radeon_crtc *radeon_crtc,
9434 				    u32 lb_size, u32 num_heads)
9435 {
9436 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9437 	struct dce8_wm_params wm_low, wm_high;
9438 	u32 pixel_period;
9439 	u32 line_time = 0;
9440 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9441 	u32 tmp, wm_mask;
9442 
9443 	if (radeon_crtc->base.enabled && num_heads && mode) {
9444 		pixel_period = 1000000 / (u32)mode->clock;
9445 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9446 
9447 		/* watermark for high clocks */
9448 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9449 		    rdev->pm.dpm_enabled) {
9450 			wm_high.yclk =
9451 				radeon_dpm_get_mclk(rdev, false) * 10;
9452 			wm_high.sclk =
9453 				radeon_dpm_get_sclk(rdev, false) * 10;
9454 		} else {
9455 			wm_high.yclk = rdev->pm.current_mclk * 10;
9456 			wm_high.sclk = rdev->pm.current_sclk * 10;
9457 		}
9458 
9459 		wm_high.disp_clk = mode->clock;
9460 		wm_high.src_width = mode->crtc_hdisplay;
9461 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9462 		wm_high.blank_time = line_time - wm_high.active_time;
9463 		wm_high.interlaced = false;
9464 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9465 			wm_high.interlaced = true;
9466 		wm_high.vsc = radeon_crtc->vsc;
9467 		wm_high.vtaps = 1;
9468 		if (radeon_crtc->rmx_type != RMX_OFF)
9469 			wm_high.vtaps = 2;
9470 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9471 		wm_high.lb_size = lb_size;
9472 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9473 		wm_high.num_heads = num_heads;
9474 
9475 		/* set for high clocks */
9476 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9477 
9478 		/* possibly force display priority to high */
9479 		/* should really do this at mode validation time... */
9480 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9481 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9482 		    !dce8_check_latency_hiding(&wm_high) ||
9483 		    (rdev->disp_priority == 2)) {
9484 			DRM_DEBUG_KMS("force priority to high\n");
9485 		}
9486 
9487 		/* watermark for low clocks */
9488 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9489 		    rdev->pm.dpm_enabled) {
9490 			wm_low.yclk =
9491 				radeon_dpm_get_mclk(rdev, true) * 10;
9492 			wm_low.sclk =
9493 				radeon_dpm_get_sclk(rdev, true) * 10;
9494 		} else {
9495 			wm_low.yclk = rdev->pm.current_mclk * 10;
9496 			wm_low.sclk = rdev->pm.current_sclk * 10;
9497 		}
9498 
9499 		wm_low.disp_clk = mode->clock;
9500 		wm_low.src_width = mode->crtc_hdisplay;
9501 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9502 		wm_low.blank_time = line_time - wm_low.active_time;
9503 		wm_low.interlaced = false;
9504 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9505 			wm_low.interlaced = true;
9506 		wm_low.vsc = radeon_crtc->vsc;
9507 		wm_low.vtaps = 1;
9508 		if (radeon_crtc->rmx_type != RMX_OFF)
9509 			wm_low.vtaps = 2;
9510 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9511 		wm_low.lb_size = lb_size;
9512 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9513 		wm_low.num_heads = num_heads;
9514 
9515 		/* set for low clocks */
9516 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9517 
9518 		/* possibly force display priority to high */
9519 		/* should really do this at mode validation time... */
9520 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9521 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9522 		    !dce8_check_latency_hiding(&wm_low) ||
9523 		    (rdev->disp_priority == 2)) {
9524 			DRM_DEBUG_KMS("force priority to high\n");
9525 		}
9526 	}
9527 
9528 	/* select wm A */
9529 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9530 	tmp = wm_mask;
9531 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9532 	tmp |= LATENCY_WATERMARK_MASK(1);
9533 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9534 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9535 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9536 		LATENCY_HIGH_WATERMARK(line_time)));
9537 	/* select wm B */
9538 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9539 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9540 	tmp |= LATENCY_WATERMARK_MASK(2);
9541 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9542 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9543 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9544 		LATENCY_HIGH_WATERMARK(line_time)));
9545 	/* restore original selection */
9546 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9547 
9548 	/* save values for DPM */
9549 	radeon_crtc->line_time = line_time;
9550 	radeon_crtc->wm_high = latency_watermark_a;
9551 	radeon_crtc->wm_low = latency_watermark_b;
9552 }
9553 
9554 /**
9555  * dce8_bandwidth_update - program display watermarks
9556  *
9557  * @rdev: radeon_device pointer
9558  *
9559  * Calculate and program the display watermarks and line
9560  * buffer allocation (CIK).
9561  */
9562 void dce8_bandwidth_update(struct radeon_device *rdev)
9563 {
9564 	struct drm_display_mode *mode = NULL;
9565 	u32 num_heads = 0, lb_size;
9566 	int i;
9567 
9568 	if (!rdev->mode_info.mode_config_initialized)
9569 		return;
9570 
9571 	radeon_update_display_priority(rdev);
9572 
9573 	for (i = 0; i < rdev->num_crtc; i++) {
9574 		if (rdev->mode_info.crtcs[i]->base.enabled)
9575 			num_heads++;
9576 	}
9577 	for (i = 0; i < rdev->num_crtc; i++) {
9578 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9579 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9580 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9581 	}
9582 }
9583 
9584 /**
9585  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9586  *
9587  * @rdev: radeon_device pointer
9588  *
9589  * Fetches a GPU clock counter snapshot (SI).
9590  * Returns the 64 bit clock counter snapshot.
9591  */
9592 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9593 {
9594 	uint64_t clock;
9595 
9596 	mutex_lock(&rdev->gpu_clock_mutex);
9597 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9598 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9599 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9600 	mutex_unlock(&rdev->gpu_clock_mutex);
9601 	return clock;
9602 }
9603 
9604 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9605                               u32 cntl_reg, u32 status_reg)
9606 {
9607 	int r, i;
9608 	struct atom_clock_dividers dividers;
9609 	uint32_t tmp;
9610 
9611 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9612 					   clock, false, &dividers);
9613 	if (r)
9614 		return r;
9615 
9616 	tmp = RREG32_SMC(cntl_reg);
9617 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9618 	tmp |= dividers.post_divider;
9619 	WREG32_SMC(cntl_reg, tmp);
9620 
9621 	for (i = 0; i < 100; i++) {
9622 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9623 			break;
9624 		mdelay(10);
9625 	}
9626 	if (i == 100)
9627 		return -ETIMEDOUT;
9628 
9629 	return 0;
9630 }
9631 
9632 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9633 {
9634 	int r = 0;
9635 
9636 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9637 	if (r)
9638 		return r;
9639 
9640 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9641 	return r;
9642 }
9643 
9644 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9645 {
9646 	int r, i;
9647 	struct atom_clock_dividers dividers;
9648 	u32 tmp;
9649 
9650 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9651 					   ecclk, false, &dividers);
9652 	if (r)
9653 		return r;
9654 
9655 	for (i = 0; i < 100; i++) {
9656 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9657 			break;
9658 		mdelay(10);
9659 	}
9660 	if (i == 100)
9661 		return -ETIMEDOUT;
9662 
9663 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9664 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9665 	tmp |= dividers.post_divider;
9666 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9667 
9668 	for (i = 0; i < 100; i++) {
9669 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9670 			break;
9671 		mdelay(10);
9672 	}
9673 	if (i == 100)
9674 		return -ETIMEDOUT;
9675 
9676 	return 0;
9677 }
9678 
9679 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9680 {
9681 	struct pci_dev *root = rdev->pdev->bus->self;
9682 	int bridge_pos, gpu_pos;
9683 	u32 speed_cntl, mask, current_data_rate;
9684 	int ret, i;
9685 	u16 tmp16;
9686 
9687 	if (pci_is_root_bus(rdev->pdev->bus))
9688 		return;
9689 
9690 	if (radeon_pcie_gen2 == 0)
9691 		return;
9692 
9693 	if (rdev->flags & RADEON_IS_IGP)
9694 		return;
9695 
9696 	if (!(rdev->flags & RADEON_IS_PCIE))
9697 		return;
9698 
9699 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9700 	if (ret != 0)
9701 		return;
9702 
9703 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9704 		return;
9705 
9706 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9707 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9708 		LC_CURRENT_DATA_RATE_SHIFT;
9709 	if (mask & DRM_PCIE_SPEED_80) {
9710 		if (current_data_rate == 2) {
9711 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9712 			return;
9713 		}
9714 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9715 	} else if (mask & DRM_PCIE_SPEED_50) {
9716 		if (current_data_rate == 1) {
9717 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9718 			return;
9719 		}
9720 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9721 	}
9722 
9723 	bridge_pos = pci_pcie_cap(root);
9724 	if (!bridge_pos)
9725 		return;
9726 
9727 	gpu_pos = pci_pcie_cap(rdev->pdev);
9728 	if (!gpu_pos)
9729 		return;
9730 
9731 	if (mask & DRM_PCIE_SPEED_80) {
9732 		/* re-try equalization if gen3 is not already enabled */
9733 		if (current_data_rate != 2) {
9734 			u16 bridge_cfg, gpu_cfg;
9735 			u16 bridge_cfg2, gpu_cfg2;
9736 			u32 max_lw, current_lw, tmp;
9737 
9738 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9739 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9740 
9741 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9742 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9743 
9744 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9745 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9746 
9747 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9748 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9749 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9750 
9751 			if (current_lw < max_lw) {
9752 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9753 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9754 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9755 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9756 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9757 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9758 				}
9759 			}
9760 
9761 			for (i = 0; i < 10; i++) {
9762 				/* check status */
9763 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9764 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9765 					break;
9766 
9767 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9768 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9769 
9770 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9771 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9772 
9773 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9774 				tmp |= LC_SET_QUIESCE;
9775 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9776 
9777 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9778 				tmp |= LC_REDO_EQ;
9779 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9780 
9781 				mdelay(100);
9782 
9783 				/* linkctl */
9784 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9785 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9786 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9787 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9788 
9789 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9790 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9791 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9792 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9793 
9794 				/* linkctl2 */
9795 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9796 				tmp16 &= ~((1 << 4) | (7 << 9));
9797 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9798 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9799 
9800 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9801 				tmp16 &= ~((1 << 4) | (7 << 9));
9802 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9803 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9804 
9805 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9806 				tmp &= ~LC_SET_QUIESCE;
9807 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9808 			}
9809 		}
9810 	}
9811 
9812 	/* set the link speed */
9813 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9814 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9815 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9816 
9817 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9818 	tmp16 &= ~0xf;
9819 	if (mask & DRM_PCIE_SPEED_80)
9820 		tmp16 |= 3; /* gen3 */
9821 	else if (mask & DRM_PCIE_SPEED_50)
9822 		tmp16 |= 2; /* gen2 */
9823 	else
9824 		tmp16 |= 1; /* gen1 */
9825 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9826 
9827 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9828 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9829 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9830 
9831 	for (i = 0; i < rdev->usec_timeout; i++) {
9832 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9833 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9834 			break;
9835 		udelay(1);
9836 	}
9837 }
9838 
9839 static void cik_program_aspm(struct radeon_device *rdev)
9840 {
9841 	u32 data, orig;
9842 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9843 	bool disable_clkreq = false;
9844 
9845 	if (radeon_aspm == 0)
9846 		return;
9847 
9848 	/* XXX double check IGPs */
9849 	if (rdev->flags & RADEON_IS_IGP)
9850 		return;
9851 
9852 	if (!(rdev->flags & RADEON_IS_PCIE))
9853 		return;
9854 
9855 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9856 	data &= ~LC_XMIT_N_FTS_MASK;
9857 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9858 	if (orig != data)
9859 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9860 
9861 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9862 	data |= LC_GO_TO_RECOVERY;
9863 	if (orig != data)
9864 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9865 
9866 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9867 	data |= P_IGNORE_EDB_ERR;
9868 	if (orig != data)
9869 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9870 
9871 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9873 	data |= LC_PMI_TO_L1_DIS;
9874 	if (!disable_l0s)
9875 		data |= LC_L0S_INACTIVITY(7);
9876 
9877 	if (!disable_l1) {
9878 		data |= LC_L1_INACTIVITY(7);
9879 		data &= ~LC_PMI_TO_L1_DIS;
9880 		if (orig != data)
9881 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9882 
9883 		if (!disable_plloff_in_l1) {
9884 			bool clk_req_support;
9885 
9886 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9887 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9888 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9889 			if (orig != data)
9890 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9891 
9892 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9893 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9894 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9895 			if (orig != data)
9896 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9897 
9898 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9899 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9900 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9901 			if (orig != data)
9902 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9903 
9904 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9905 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9906 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9907 			if (orig != data)
9908 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9909 
9910 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9911 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9912 			data |= LC_DYN_LANES_PWR_STATE(3);
9913 			if (orig != data)
9914 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9915 
9916 			if (!disable_clkreq &&
9917 			    !pci_is_root_bus(rdev->pdev->bus)) {
9918 				struct pci_dev *root = rdev->pdev->bus->self;
9919 				u32 lnkcap;
9920 
9921 				clk_req_support = false;
9922 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9923 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9924 					clk_req_support = true;
9925 			} else {
9926 				clk_req_support = false;
9927 			}
9928 
9929 			if (clk_req_support) {
9930 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9931 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9932 				if (orig != data)
9933 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9934 
9935 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9936 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9937 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9938 				if (orig != data)
9939 					WREG32_SMC(THM_CLK_CNTL, data);
9940 
9941 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9942 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9943 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9944 				if (orig != data)
9945 					WREG32_SMC(MISC_CLK_CTRL, data);
9946 
9947 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9948 				data &= ~BCLK_AS_XCLK;
9949 				if (orig != data)
9950 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9951 
9952 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9953 				data &= ~FORCE_BIF_REFCLK_EN;
9954 				if (orig != data)
9955 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9956 
9957 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9958 				data &= ~MPLL_CLKOUT_SEL_MASK;
9959 				data |= MPLL_CLKOUT_SEL(4);
9960 				if (orig != data)
9961 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9962 			}
9963 		}
9964 	} else {
9965 		if (orig != data)
9966 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9967 	}
9968 
9969 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9970 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9971 	if (orig != data)
9972 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9973 
9974 	if (!disable_l0s) {
9975 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9976 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9977 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9978 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9979 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9980 				data &= ~LC_L0S_INACTIVITY_MASK;
9981 				if (orig != data)
9982 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9983 			}
9984 		}
9985 	}
9986 }
9987