xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 3c6a73cc)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
118 extern void r600_ih_ring_fini(struct radeon_device *rdev);
119 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
120 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
125 extern void si_rlc_reset(struct radeon_device *rdev);
126 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
127 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
128 extern int cik_sdma_resume(struct radeon_device *rdev);
129 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
130 extern void cik_sdma_fini(struct radeon_device *rdev);
131 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
132 static void cik_rlc_stop(struct radeon_device *rdev);
133 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
134 static void cik_program_aspm(struct radeon_device *rdev);
135 static void cik_init_pg(struct radeon_device *rdev);
136 static void cik_init_cg(struct radeon_device *rdev);
137 static void cik_fini_pg(struct radeon_device *rdev);
138 static void cik_fini_cg(struct radeon_device *rdev);
139 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
140 					  bool enable);
141 
142 /* get temperature in millidegrees */
143 int ci_get_temp(struct radeon_device *rdev)
144 {
145 	u32 temp;
146 	int actual_temp = 0;
147 
148 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
149 		CTF_TEMP_SHIFT;
150 
151 	if (temp & 0x200)
152 		actual_temp = 255;
153 	else
154 		actual_temp = temp & 0x1ff;
155 
156 	actual_temp = actual_temp * 1000;
157 
158 	return actual_temp;
159 }
160 
161 /* get temperature in millidegrees */
162 int kv_get_temp(struct radeon_device *rdev)
163 {
164 	u32 temp;
165 	int actual_temp = 0;
166 
167 	temp = RREG32_SMC(0xC0300E0C);
168 
169 	if (temp)
170 		actual_temp = (temp / 8) - 49;
171 	else
172 		actual_temp = 0;
173 
174 	actual_temp = actual_temp * 1000;
175 
176 	return actual_temp;
177 }
178 
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
183 {
184 	unsigned long flags;
185 	u32 r;
186 
187 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
188 	WREG32(PCIE_INDEX, reg);
189 	(void)RREG32(PCIE_INDEX);
190 	r = RREG32(PCIE_DATA);
191 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
192 	return r;
193 }
194 
195 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
196 {
197 	unsigned long flags;
198 
199 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
200 	WREG32(PCIE_INDEX, reg);
201 	(void)RREG32(PCIE_INDEX);
202 	WREG32(PCIE_DATA, v);
203 	(void)RREG32(PCIE_DATA);
204 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
205 }
206 
207 static const u32 spectre_rlc_save_restore_register_list[] =
208 {
209 	(0x0e00 << 16) | (0xc12c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc140 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc150 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0xc15c >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0xc168 >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0xc170 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0xc178 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0xc204 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0xc2b4 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc2b8 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0xc2bc >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0xc2c0 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8228 >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x829c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x869c >> 2),
238 	0x00000000,
239 	(0x0600 << 16) | (0x98f4 >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0x98f8 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0x9900 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc260 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0x90e8 >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0x3c000 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0x3c00c >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0x8c1c >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0x9700 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xcd20 >> 2),
258 	0x00000000,
259 	(0x4e00 << 16) | (0xcd20 >> 2),
260 	0x00000000,
261 	(0x5e00 << 16) | (0xcd20 >> 2),
262 	0x00000000,
263 	(0x6e00 << 16) | (0xcd20 >> 2),
264 	0x00000000,
265 	(0x7e00 << 16) | (0xcd20 >> 2),
266 	0x00000000,
267 	(0x8e00 << 16) | (0xcd20 >> 2),
268 	0x00000000,
269 	(0x9e00 << 16) | (0xcd20 >> 2),
270 	0x00000000,
271 	(0xae00 << 16) | (0xcd20 >> 2),
272 	0x00000000,
273 	(0xbe00 << 16) | (0xcd20 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0x89bc >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0x8900 >> 2),
278 	0x00000000,
279 	0x3,
280 	(0x0e00 << 16) | (0xc130 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc134 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc1fc >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc208 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc264 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc268 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc26c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc270 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc274 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc278 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc27c >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc280 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc284 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc288 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc28c >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0xc290 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc294 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0xc298 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc29c >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0xc2a0 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0xc2a4 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0xc2a8 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xc2ac  >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0xc2b0 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0x301d0 >> 2),
329 	0x00000000,
330 	(0x0e00 << 16) | (0x30238 >> 2),
331 	0x00000000,
332 	(0x0e00 << 16) | (0x30250 >> 2),
333 	0x00000000,
334 	(0x0e00 << 16) | (0x30254 >> 2),
335 	0x00000000,
336 	(0x0e00 << 16) | (0x30258 >> 2),
337 	0x00000000,
338 	(0x0e00 << 16) | (0x3025c >> 2),
339 	0x00000000,
340 	(0x4e00 << 16) | (0xc900 >> 2),
341 	0x00000000,
342 	(0x5e00 << 16) | (0xc900 >> 2),
343 	0x00000000,
344 	(0x6e00 << 16) | (0xc900 >> 2),
345 	0x00000000,
346 	(0x7e00 << 16) | (0xc900 >> 2),
347 	0x00000000,
348 	(0x8e00 << 16) | (0xc900 >> 2),
349 	0x00000000,
350 	(0x9e00 << 16) | (0xc900 >> 2),
351 	0x00000000,
352 	(0xae00 << 16) | (0xc900 >> 2),
353 	0x00000000,
354 	(0xbe00 << 16) | (0xc900 >> 2),
355 	0x00000000,
356 	(0x4e00 << 16) | (0xc904 >> 2),
357 	0x00000000,
358 	(0x5e00 << 16) | (0xc904 >> 2),
359 	0x00000000,
360 	(0x6e00 << 16) | (0xc904 >> 2),
361 	0x00000000,
362 	(0x7e00 << 16) | (0xc904 >> 2),
363 	0x00000000,
364 	(0x8e00 << 16) | (0xc904 >> 2),
365 	0x00000000,
366 	(0x9e00 << 16) | (0xc904 >> 2),
367 	0x00000000,
368 	(0xae00 << 16) | (0xc904 >> 2),
369 	0x00000000,
370 	(0xbe00 << 16) | (0xc904 >> 2),
371 	0x00000000,
372 	(0x4e00 << 16) | (0xc908 >> 2),
373 	0x00000000,
374 	(0x5e00 << 16) | (0xc908 >> 2),
375 	0x00000000,
376 	(0x6e00 << 16) | (0xc908 >> 2),
377 	0x00000000,
378 	(0x7e00 << 16) | (0xc908 >> 2),
379 	0x00000000,
380 	(0x8e00 << 16) | (0xc908 >> 2),
381 	0x00000000,
382 	(0x9e00 << 16) | (0xc908 >> 2),
383 	0x00000000,
384 	(0xae00 << 16) | (0xc908 >> 2),
385 	0x00000000,
386 	(0xbe00 << 16) | (0xc908 >> 2),
387 	0x00000000,
388 	(0x4e00 << 16) | (0xc90c >> 2),
389 	0x00000000,
390 	(0x5e00 << 16) | (0xc90c >> 2),
391 	0x00000000,
392 	(0x6e00 << 16) | (0xc90c >> 2),
393 	0x00000000,
394 	(0x7e00 << 16) | (0xc90c >> 2),
395 	0x00000000,
396 	(0x8e00 << 16) | (0xc90c >> 2),
397 	0x00000000,
398 	(0x9e00 << 16) | (0xc90c >> 2),
399 	0x00000000,
400 	(0xae00 << 16) | (0xc90c >> 2),
401 	0x00000000,
402 	(0xbe00 << 16) | (0xc90c >> 2),
403 	0x00000000,
404 	(0x4e00 << 16) | (0xc910 >> 2),
405 	0x00000000,
406 	(0x5e00 << 16) | (0xc910 >> 2),
407 	0x00000000,
408 	(0x6e00 << 16) | (0xc910 >> 2),
409 	0x00000000,
410 	(0x7e00 << 16) | (0xc910 >> 2),
411 	0x00000000,
412 	(0x8e00 << 16) | (0xc910 >> 2),
413 	0x00000000,
414 	(0x9e00 << 16) | (0xc910 >> 2),
415 	0x00000000,
416 	(0xae00 << 16) | (0xc910 >> 2),
417 	0x00000000,
418 	(0xbe00 << 16) | (0xc910 >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc99c >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0x9834 >> 2),
423 	0x00000000,
424 	(0x0000 << 16) | (0x30f00 >> 2),
425 	0x00000000,
426 	(0x0001 << 16) | (0x30f00 >> 2),
427 	0x00000000,
428 	(0x0000 << 16) | (0x30f04 >> 2),
429 	0x00000000,
430 	(0x0001 << 16) | (0x30f04 >> 2),
431 	0x00000000,
432 	(0x0000 << 16) | (0x30f08 >> 2),
433 	0x00000000,
434 	(0x0001 << 16) | (0x30f08 >> 2),
435 	0x00000000,
436 	(0x0000 << 16) | (0x30f0c >> 2),
437 	0x00000000,
438 	(0x0001 << 16) | (0x30f0c >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x9b7c >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x8a14 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x8a18 >> 2),
445 	0x00000000,
446 	(0x0600 << 16) | (0x30a00 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x8bf0 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x8bcc >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8b24 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x30a04 >> 2),
455 	0x00000000,
456 	(0x0600 << 16) | (0x30a10 >> 2),
457 	0x00000000,
458 	(0x0600 << 16) | (0x30a14 >> 2),
459 	0x00000000,
460 	(0x0600 << 16) | (0x30a18 >> 2),
461 	0x00000000,
462 	(0x0600 << 16) | (0x30a2c >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0xc700 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xc704 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xc708 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xc768 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc770 >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc774 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc778 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc77c >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc780 >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc784 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc788 >> 2),
485 	0x00000000,
486 	(0x0400 << 16) | (0xc78c >> 2),
487 	0x00000000,
488 	(0x0400 << 16) | (0xc798 >> 2),
489 	0x00000000,
490 	(0x0400 << 16) | (0xc79c >> 2),
491 	0x00000000,
492 	(0x0400 << 16) | (0xc7a0 >> 2),
493 	0x00000000,
494 	(0x0400 << 16) | (0xc7a4 >> 2),
495 	0x00000000,
496 	(0x0400 << 16) | (0xc7a8 >> 2),
497 	0x00000000,
498 	(0x0400 << 16) | (0xc7ac >> 2),
499 	0x00000000,
500 	(0x0400 << 16) | (0xc7b0 >> 2),
501 	0x00000000,
502 	(0x0400 << 16) | (0xc7b4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x9100 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x3c010 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92a8 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x92ac >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x92b4 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x92b8 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x92bc >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x92c0 >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x92c4 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x92c8 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x92cc >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x92d0 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x8c00 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x8c04 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x8c20 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x8c38 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x8c3c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xae00 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x9604 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac08 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac0c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac10 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac14 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac58 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac68 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xac6c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xac70 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0xac74 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0xac78 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0xac7c >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0xac80 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0xac84 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0xac88 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0xac8c >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x970c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x9714 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x9718 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x971c >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x31068 >> 2),
581 	0x00000000,
582 	(0x4e00 << 16) | (0x31068 >> 2),
583 	0x00000000,
584 	(0x5e00 << 16) | (0x31068 >> 2),
585 	0x00000000,
586 	(0x6e00 << 16) | (0x31068 >> 2),
587 	0x00000000,
588 	(0x7e00 << 16) | (0x31068 >> 2),
589 	0x00000000,
590 	(0x8e00 << 16) | (0x31068 >> 2),
591 	0x00000000,
592 	(0x9e00 << 16) | (0x31068 >> 2),
593 	0x00000000,
594 	(0xae00 << 16) | (0x31068 >> 2),
595 	0x00000000,
596 	(0xbe00 << 16) | (0x31068 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xcd10 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xcd14 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88b0 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x88b4 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x88b8 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x88bc >> 2),
609 	0x00000000,
610 	(0x0400 << 16) | (0x89c0 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x88c4 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x88c8 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x88d0 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x88d4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x88d8 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x8980 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x30938 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x3093c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x30940 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x89a0 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x30900 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x30904 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x89b4 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x3c210 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c214 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c218 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8904 >> 2),
645 	0x00000000,
646 	0x5,
647 	(0x0e00 << 16) | (0x8c28 >> 2),
648 	(0x0e00 << 16) | (0x8c2c >> 2),
649 	(0x0e00 << 16) | (0x8c30 >> 2),
650 	(0x0e00 << 16) | (0x8c34 >> 2),
651 	(0x0e00 << 16) | (0x9600 >> 2),
652 };
653 
654 static const u32 kalindi_rlc_save_restore_register_list[] =
655 {
656 	(0x0e00 << 16) | (0xc12c >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc140 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xc150 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0xc15c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0xc168 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0xc170 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0xc204 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0xc2b4 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc2b8 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc2bc >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc2c0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x8228 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x829c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x869c >> 2),
683 	0x00000000,
684 	(0x0600 << 16) | (0x98f4 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x98f8 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x9900 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc260 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x90e8 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x3c000 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x3c00c >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x8c1c >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x9700 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xcd20 >> 2),
703 	0x00000000,
704 	(0x4e00 << 16) | (0xcd20 >> 2),
705 	0x00000000,
706 	(0x5e00 << 16) | (0xcd20 >> 2),
707 	0x00000000,
708 	(0x6e00 << 16) | (0xcd20 >> 2),
709 	0x00000000,
710 	(0x7e00 << 16) | (0xcd20 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x89bc >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x8900 >> 2),
715 	0x00000000,
716 	0x3,
717 	(0x0e00 << 16) | (0xc130 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc134 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc1fc >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc208 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc264 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc268 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc26c >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc270 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc274 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc28c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc290 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc294 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc298 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2a0 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2a4 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2a8 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc2ac >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x301d0 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x30238 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x30250 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x30254 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x30258 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3025c >> 2),
762 	0x00000000,
763 	(0x4e00 << 16) | (0xc900 >> 2),
764 	0x00000000,
765 	(0x5e00 << 16) | (0xc900 >> 2),
766 	0x00000000,
767 	(0x6e00 << 16) | (0xc900 >> 2),
768 	0x00000000,
769 	(0x7e00 << 16) | (0xc900 >> 2),
770 	0x00000000,
771 	(0x4e00 << 16) | (0xc904 >> 2),
772 	0x00000000,
773 	(0x5e00 << 16) | (0xc904 >> 2),
774 	0x00000000,
775 	(0x6e00 << 16) | (0xc904 >> 2),
776 	0x00000000,
777 	(0x7e00 << 16) | (0xc904 >> 2),
778 	0x00000000,
779 	(0x4e00 << 16) | (0xc908 >> 2),
780 	0x00000000,
781 	(0x5e00 << 16) | (0xc908 >> 2),
782 	0x00000000,
783 	(0x6e00 << 16) | (0xc908 >> 2),
784 	0x00000000,
785 	(0x7e00 << 16) | (0xc908 >> 2),
786 	0x00000000,
787 	(0x4e00 << 16) | (0xc90c >> 2),
788 	0x00000000,
789 	(0x5e00 << 16) | (0xc90c >> 2),
790 	0x00000000,
791 	(0x6e00 << 16) | (0xc90c >> 2),
792 	0x00000000,
793 	(0x7e00 << 16) | (0xc90c >> 2),
794 	0x00000000,
795 	(0x4e00 << 16) | (0xc910 >> 2),
796 	0x00000000,
797 	(0x5e00 << 16) | (0xc910 >> 2),
798 	0x00000000,
799 	(0x6e00 << 16) | (0xc910 >> 2),
800 	0x00000000,
801 	(0x7e00 << 16) | (0xc910 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc99c >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x9834 >> 2),
806 	0x00000000,
807 	(0x0000 << 16) | (0x30f00 >> 2),
808 	0x00000000,
809 	(0x0000 << 16) | (0x30f04 >> 2),
810 	0x00000000,
811 	(0x0000 << 16) | (0x30f08 >> 2),
812 	0x00000000,
813 	(0x0000 << 16) | (0x30f0c >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x9b7c >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8a14 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8a18 >> 2),
820 	0x00000000,
821 	(0x0600 << 16) | (0x30a00 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8bf0 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0x8bcc >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x8b24 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x30a04 >> 2),
830 	0x00000000,
831 	(0x0600 << 16) | (0x30a10 >> 2),
832 	0x00000000,
833 	(0x0600 << 16) | (0x30a14 >> 2),
834 	0x00000000,
835 	(0x0600 << 16) | (0x30a18 >> 2),
836 	0x00000000,
837 	(0x0600 << 16) | (0x30a2c >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xc700 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xc704 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xc708 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xc768 >> 2),
846 	0x00000000,
847 	(0x0400 << 16) | (0xc770 >> 2),
848 	0x00000000,
849 	(0x0400 << 16) | (0xc774 >> 2),
850 	0x00000000,
851 	(0x0400 << 16) | (0xc798 >> 2),
852 	0x00000000,
853 	(0x0400 << 16) | (0xc79c >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x9100 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x3c010 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x8c00 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x8c04 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x8c20 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x8c38 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x8c3c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xae00 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x9604 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac08 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac0c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac58 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac68 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0xac6c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0xac70 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0xac74 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0xac78 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0xac7c >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0xac80 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0xac84 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xac88 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xac8c >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x970c >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x9714 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x9718 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x971c >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x31068 >> 2),
912 	0x00000000,
913 	(0x4e00 << 16) | (0x31068 >> 2),
914 	0x00000000,
915 	(0x5e00 << 16) | (0x31068 >> 2),
916 	0x00000000,
917 	(0x6e00 << 16) | (0x31068 >> 2),
918 	0x00000000,
919 	(0x7e00 << 16) | (0x31068 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0xcd10 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0xcd14 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88b0 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x88b4 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x88b8 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x88bc >> 2),
932 	0x00000000,
933 	(0x0400 << 16) | (0x89c0 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x88c4 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x88c8 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x88d0 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x88d4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x88d8 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x8980 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x30938 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3093c >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x30940 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0x89a0 >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0x30900 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0x30904 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0x89b4 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0x3e1fc >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x3c210 >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x3c214 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x3c218 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x8904 >> 2),
970 	0x00000000,
971 	0x5,
972 	(0x0e00 << 16) | (0x8c28 >> 2),
973 	(0x0e00 << 16) | (0x8c2c >> 2),
974 	(0x0e00 << 16) | (0x8c30 >> 2),
975 	(0x0e00 << 16) | (0x8c34 >> 2),
976 	(0x0e00 << 16) | (0x9600 >> 2),
977 };
978 
979 static const u32 bonaire_golden_spm_registers[] =
980 {
981 	0x30800, 0xe0ffffff, 0xe0000000
982 };
983 
984 static const u32 bonaire_golden_common_registers[] =
985 {
986 	0xc770, 0xffffffff, 0x00000800,
987 	0xc774, 0xffffffff, 0x00000800,
988 	0xc798, 0xffffffff, 0x00007fbf,
989 	0xc79c, 0xffffffff, 0x00007faf
990 };
991 
992 static const u32 bonaire_golden_registers[] =
993 {
994 	0x3354, 0x00000333, 0x00000333,
995 	0x3350, 0x000c0fc0, 0x00040200,
996 	0x9a10, 0x00010000, 0x00058208,
997 	0x3c000, 0xffff1fff, 0x00140000,
998 	0x3c200, 0xfdfc0fff, 0x00000100,
999 	0x3c234, 0x40000000, 0x40000200,
1000 	0x9830, 0xffffffff, 0x00000000,
1001 	0x9834, 0xf00fffff, 0x00000400,
1002 	0x9838, 0x0002021c, 0x00020200,
1003 	0xc78, 0x00000080, 0x00000000,
1004 	0x5bb0, 0x000000f0, 0x00000070,
1005 	0x5bc0, 0xf0311fff, 0x80300000,
1006 	0x98f8, 0x73773777, 0x12010001,
1007 	0x350c, 0x00810000, 0x408af000,
1008 	0x7030, 0x31000111, 0x00000011,
1009 	0x2f48, 0x73773777, 0x12010001,
1010 	0x220c, 0x00007fb6, 0x0021a1b1,
1011 	0x2210, 0x00007fb6, 0x002021b1,
1012 	0x2180, 0x00007fb6, 0x00002191,
1013 	0x2218, 0x00007fb6, 0x002121b1,
1014 	0x221c, 0x00007fb6, 0x002021b1,
1015 	0x21dc, 0x00007fb6, 0x00002191,
1016 	0x21e0, 0x00007fb6, 0x00002191,
1017 	0x3628, 0x0000003f, 0x0000000a,
1018 	0x362c, 0x0000003f, 0x0000000a,
1019 	0x2ae4, 0x00073ffe, 0x000022a2,
1020 	0x240c, 0x000007ff, 0x00000000,
1021 	0x8a14, 0xf000003f, 0x00000007,
1022 	0x8bf0, 0x00002001, 0x00000001,
1023 	0x8b24, 0xffffffff, 0x00ffffff,
1024 	0x30a04, 0x0000ff0f, 0x00000000,
1025 	0x28a4c, 0x07ffffff, 0x06000000,
1026 	0x4d8, 0x00000fff, 0x00000100,
1027 	0x3e78, 0x00000001, 0x00000002,
1028 	0x9100, 0x03000000, 0x0362c688,
1029 	0x8c00, 0x000000ff, 0x00000001,
1030 	0xe40, 0x00001fff, 0x00001fff,
1031 	0x9060, 0x0000007f, 0x00000020,
1032 	0x9508, 0x00010000, 0x00010000,
1033 	0xac14, 0x000003ff, 0x000000f3,
1034 	0xac0c, 0xffffffff, 0x00001032
1035 };
1036 
1037 static const u32 bonaire_mgcg_cgcg_init[] =
1038 {
1039 	0xc420, 0xffffffff, 0xfffffffc,
1040 	0x30800, 0xffffffff, 0xe0000000,
1041 	0x3c2a0, 0xffffffff, 0x00000100,
1042 	0x3c208, 0xffffffff, 0x00000100,
1043 	0x3c2c0, 0xffffffff, 0xc0000100,
1044 	0x3c2c8, 0xffffffff, 0xc0000100,
1045 	0x3c2c4, 0xffffffff, 0xc0000100,
1046 	0x55e4, 0xffffffff, 0x00600100,
1047 	0x3c280, 0xffffffff, 0x00000100,
1048 	0x3c214, 0xffffffff, 0x06000100,
1049 	0x3c220, 0xffffffff, 0x00000100,
1050 	0x3c218, 0xffffffff, 0x06000100,
1051 	0x3c204, 0xffffffff, 0x00000100,
1052 	0x3c2e0, 0xffffffff, 0x00000100,
1053 	0x3c224, 0xffffffff, 0x00000100,
1054 	0x3c200, 0xffffffff, 0x00000100,
1055 	0x3c230, 0xffffffff, 0x00000100,
1056 	0x3c234, 0xffffffff, 0x00000100,
1057 	0x3c250, 0xffffffff, 0x00000100,
1058 	0x3c254, 0xffffffff, 0x00000100,
1059 	0x3c258, 0xffffffff, 0x00000100,
1060 	0x3c25c, 0xffffffff, 0x00000100,
1061 	0x3c260, 0xffffffff, 0x00000100,
1062 	0x3c27c, 0xffffffff, 0x00000100,
1063 	0x3c278, 0xffffffff, 0x00000100,
1064 	0x3c210, 0xffffffff, 0x06000100,
1065 	0x3c290, 0xffffffff, 0x00000100,
1066 	0x3c274, 0xffffffff, 0x00000100,
1067 	0x3c2b4, 0xffffffff, 0x00000100,
1068 	0x3c2b0, 0xffffffff, 0x00000100,
1069 	0x3c270, 0xffffffff, 0x00000100,
1070 	0x30800, 0xffffffff, 0xe0000000,
1071 	0x3c020, 0xffffffff, 0x00010000,
1072 	0x3c024, 0xffffffff, 0x00030002,
1073 	0x3c028, 0xffffffff, 0x00040007,
1074 	0x3c02c, 0xffffffff, 0x00060005,
1075 	0x3c030, 0xffffffff, 0x00090008,
1076 	0x3c034, 0xffffffff, 0x00010000,
1077 	0x3c038, 0xffffffff, 0x00030002,
1078 	0x3c03c, 0xffffffff, 0x00040007,
1079 	0x3c040, 0xffffffff, 0x00060005,
1080 	0x3c044, 0xffffffff, 0x00090008,
1081 	0x3c048, 0xffffffff, 0x00010000,
1082 	0x3c04c, 0xffffffff, 0x00030002,
1083 	0x3c050, 0xffffffff, 0x00040007,
1084 	0x3c054, 0xffffffff, 0x00060005,
1085 	0x3c058, 0xffffffff, 0x00090008,
1086 	0x3c05c, 0xffffffff, 0x00010000,
1087 	0x3c060, 0xffffffff, 0x00030002,
1088 	0x3c064, 0xffffffff, 0x00040007,
1089 	0x3c068, 0xffffffff, 0x00060005,
1090 	0x3c06c, 0xffffffff, 0x00090008,
1091 	0x3c070, 0xffffffff, 0x00010000,
1092 	0x3c074, 0xffffffff, 0x00030002,
1093 	0x3c078, 0xffffffff, 0x00040007,
1094 	0x3c07c, 0xffffffff, 0x00060005,
1095 	0x3c080, 0xffffffff, 0x00090008,
1096 	0x3c084, 0xffffffff, 0x00010000,
1097 	0x3c088, 0xffffffff, 0x00030002,
1098 	0x3c08c, 0xffffffff, 0x00040007,
1099 	0x3c090, 0xffffffff, 0x00060005,
1100 	0x3c094, 0xffffffff, 0x00090008,
1101 	0x3c098, 0xffffffff, 0x00010000,
1102 	0x3c09c, 0xffffffff, 0x00030002,
1103 	0x3c0a0, 0xffffffff, 0x00040007,
1104 	0x3c0a4, 0xffffffff, 0x00060005,
1105 	0x3c0a8, 0xffffffff, 0x00090008,
1106 	0x3c000, 0xffffffff, 0x96e00200,
1107 	0x8708, 0xffffffff, 0x00900100,
1108 	0xc424, 0xffffffff, 0x0020003f,
1109 	0x38, 0xffffffff, 0x0140001c,
1110 	0x3c, 0x000f0000, 0x000f0000,
1111 	0x220, 0xffffffff, 0xC060000C,
1112 	0x224, 0xc0000fff, 0x00000100,
1113 	0xf90, 0xffffffff, 0x00000100,
1114 	0xf98, 0x00000101, 0x00000000,
1115 	0x20a8, 0xffffffff, 0x00000104,
1116 	0x55e4, 0xff000fff, 0x00000100,
1117 	0x30cc, 0xc0000fff, 0x00000104,
1118 	0xc1e4, 0x00000001, 0x00000001,
1119 	0xd00c, 0xff000ff0, 0x00000100,
1120 	0xd80c, 0xff000ff0, 0x00000100
1121 };
1122 
1123 static const u32 spectre_golden_spm_registers[] =
1124 {
1125 	0x30800, 0xe0ffffff, 0xe0000000
1126 };
1127 
1128 static const u32 spectre_golden_common_registers[] =
1129 {
1130 	0xc770, 0xffffffff, 0x00000800,
1131 	0xc774, 0xffffffff, 0x00000800,
1132 	0xc798, 0xffffffff, 0x00007fbf,
1133 	0xc79c, 0xffffffff, 0x00007faf
1134 };
1135 
1136 static const u32 spectre_golden_registers[] =
1137 {
1138 	0x3c000, 0xffff1fff, 0x96940200,
1139 	0x3c00c, 0xffff0001, 0xff000000,
1140 	0x3c200, 0xfffc0fff, 0x00000100,
1141 	0x6ed8, 0x00010101, 0x00010000,
1142 	0x9834, 0xf00fffff, 0x00000400,
1143 	0x9838, 0xfffffffc, 0x00020200,
1144 	0x5bb0, 0x000000f0, 0x00000070,
1145 	0x5bc0, 0xf0311fff, 0x80300000,
1146 	0x98f8, 0x73773777, 0x12010001,
1147 	0x9b7c, 0x00ff0000, 0x00fc0000,
1148 	0x2f48, 0x73773777, 0x12010001,
1149 	0x8a14, 0xf000003f, 0x00000007,
1150 	0x8b24, 0xffffffff, 0x00ffffff,
1151 	0x28350, 0x3f3f3fff, 0x00000082,
1152 	0x28354, 0x0000003f, 0x00000000,
1153 	0x3e78, 0x00000001, 0x00000002,
1154 	0x913c, 0xffff03df, 0x00000004,
1155 	0xc768, 0x00000008, 0x00000008,
1156 	0x8c00, 0x000008ff, 0x00000800,
1157 	0x9508, 0x00010000, 0x00010000,
1158 	0xac0c, 0xffffffff, 0x54763210,
1159 	0x214f8, 0x01ff01ff, 0x00000002,
1160 	0x21498, 0x007ff800, 0x00200000,
1161 	0x2015c, 0xffffffff, 0x00000f40,
1162 	0x30934, 0xffffffff, 0x00000001
1163 };
1164 
1165 static const u32 spectre_mgcg_cgcg_init[] =
1166 {
1167 	0xc420, 0xffffffff, 0xfffffffc,
1168 	0x30800, 0xffffffff, 0xe0000000,
1169 	0x3c2a0, 0xffffffff, 0x00000100,
1170 	0x3c208, 0xffffffff, 0x00000100,
1171 	0x3c2c0, 0xffffffff, 0x00000100,
1172 	0x3c2c8, 0xffffffff, 0x00000100,
1173 	0x3c2c4, 0xffffffff, 0x00000100,
1174 	0x55e4, 0xffffffff, 0x00600100,
1175 	0x3c280, 0xffffffff, 0x00000100,
1176 	0x3c214, 0xffffffff, 0x06000100,
1177 	0x3c220, 0xffffffff, 0x00000100,
1178 	0x3c218, 0xffffffff, 0x06000100,
1179 	0x3c204, 0xffffffff, 0x00000100,
1180 	0x3c2e0, 0xffffffff, 0x00000100,
1181 	0x3c224, 0xffffffff, 0x00000100,
1182 	0x3c200, 0xffffffff, 0x00000100,
1183 	0x3c230, 0xffffffff, 0x00000100,
1184 	0x3c234, 0xffffffff, 0x00000100,
1185 	0x3c250, 0xffffffff, 0x00000100,
1186 	0x3c254, 0xffffffff, 0x00000100,
1187 	0x3c258, 0xffffffff, 0x00000100,
1188 	0x3c25c, 0xffffffff, 0x00000100,
1189 	0x3c260, 0xffffffff, 0x00000100,
1190 	0x3c27c, 0xffffffff, 0x00000100,
1191 	0x3c278, 0xffffffff, 0x00000100,
1192 	0x3c210, 0xffffffff, 0x06000100,
1193 	0x3c290, 0xffffffff, 0x00000100,
1194 	0x3c274, 0xffffffff, 0x00000100,
1195 	0x3c2b4, 0xffffffff, 0x00000100,
1196 	0x3c2b0, 0xffffffff, 0x00000100,
1197 	0x3c270, 0xffffffff, 0x00000100,
1198 	0x30800, 0xffffffff, 0xe0000000,
1199 	0x3c020, 0xffffffff, 0x00010000,
1200 	0x3c024, 0xffffffff, 0x00030002,
1201 	0x3c028, 0xffffffff, 0x00040007,
1202 	0x3c02c, 0xffffffff, 0x00060005,
1203 	0x3c030, 0xffffffff, 0x00090008,
1204 	0x3c034, 0xffffffff, 0x00010000,
1205 	0x3c038, 0xffffffff, 0x00030002,
1206 	0x3c03c, 0xffffffff, 0x00040007,
1207 	0x3c040, 0xffffffff, 0x00060005,
1208 	0x3c044, 0xffffffff, 0x00090008,
1209 	0x3c048, 0xffffffff, 0x00010000,
1210 	0x3c04c, 0xffffffff, 0x00030002,
1211 	0x3c050, 0xffffffff, 0x00040007,
1212 	0x3c054, 0xffffffff, 0x00060005,
1213 	0x3c058, 0xffffffff, 0x00090008,
1214 	0x3c05c, 0xffffffff, 0x00010000,
1215 	0x3c060, 0xffffffff, 0x00030002,
1216 	0x3c064, 0xffffffff, 0x00040007,
1217 	0x3c068, 0xffffffff, 0x00060005,
1218 	0x3c06c, 0xffffffff, 0x00090008,
1219 	0x3c070, 0xffffffff, 0x00010000,
1220 	0x3c074, 0xffffffff, 0x00030002,
1221 	0x3c078, 0xffffffff, 0x00040007,
1222 	0x3c07c, 0xffffffff, 0x00060005,
1223 	0x3c080, 0xffffffff, 0x00090008,
1224 	0x3c084, 0xffffffff, 0x00010000,
1225 	0x3c088, 0xffffffff, 0x00030002,
1226 	0x3c08c, 0xffffffff, 0x00040007,
1227 	0x3c090, 0xffffffff, 0x00060005,
1228 	0x3c094, 0xffffffff, 0x00090008,
1229 	0x3c098, 0xffffffff, 0x00010000,
1230 	0x3c09c, 0xffffffff, 0x00030002,
1231 	0x3c0a0, 0xffffffff, 0x00040007,
1232 	0x3c0a4, 0xffffffff, 0x00060005,
1233 	0x3c0a8, 0xffffffff, 0x00090008,
1234 	0x3c0ac, 0xffffffff, 0x00010000,
1235 	0x3c0b0, 0xffffffff, 0x00030002,
1236 	0x3c0b4, 0xffffffff, 0x00040007,
1237 	0x3c0b8, 0xffffffff, 0x00060005,
1238 	0x3c0bc, 0xffffffff, 0x00090008,
1239 	0x3c000, 0xffffffff, 0x96e00200,
1240 	0x8708, 0xffffffff, 0x00900100,
1241 	0xc424, 0xffffffff, 0x0020003f,
1242 	0x38, 0xffffffff, 0x0140001c,
1243 	0x3c, 0x000f0000, 0x000f0000,
1244 	0x220, 0xffffffff, 0xC060000C,
1245 	0x224, 0xc0000fff, 0x00000100,
1246 	0xf90, 0xffffffff, 0x00000100,
1247 	0xf98, 0x00000101, 0x00000000,
1248 	0x20a8, 0xffffffff, 0x00000104,
1249 	0x55e4, 0xff000fff, 0x00000100,
1250 	0x30cc, 0xc0000fff, 0x00000104,
1251 	0xc1e4, 0x00000001, 0x00000001,
1252 	0xd00c, 0xff000ff0, 0x00000100,
1253 	0xd80c, 0xff000ff0, 0x00000100
1254 };
1255 
1256 static const u32 kalindi_golden_spm_registers[] =
1257 {
1258 	0x30800, 0xe0ffffff, 0xe0000000
1259 };
1260 
1261 static const u32 kalindi_golden_common_registers[] =
1262 {
1263 	0xc770, 0xffffffff, 0x00000800,
1264 	0xc774, 0xffffffff, 0x00000800,
1265 	0xc798, 0xffffffff, 0x00007fbf,
1266 	0xc79c, 0xffffffff, 0x00007faf
1267 };
1268 
1269 static const u32 kalindi_golden_registers[] =
1270 {
1271 	0x3c000, 0xffffdfff, 0x6e944040,
1272 	0x55e4, 0xff607fff, 0xfc000100,
1273 	0x3c220, 0xff000fff, 0x00000100,
1274 	0x3c224, 0xff000fff, 0x00000100,
1275 	0x3c200, 0xfffc0fff, 0x00000100,
1276 	0x6ed8, 0x00010101, 0x00010000,
1277 	0x9830, 0xffffffff, 0x00000000,
1278 	0x9834, 0xf00fffff, 0x00000400,
1279 	0x5bb0, 0x000000f0, 0x00000070,
1280 	0x5bc0, 0xf0311fff, 0x80300000,
1281 	0x98f8, 0x73773777, 0x12010001,
1282 	0x98fc, 0xffffffff, 0x00000010,
1283 	0x9b7c, 0x00ff0000, 0x00fc0000,
1284 	0x8030, 0x00001f0f, 0x0000100a,
1285 	0x2f48, 0x73773777, 0x12010001,
1286 	0x2408, 0x000fffff, 0x000c007f,
1287 	0x8a14, 0xf000003f, 0x00000007,
1288 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1289 	0x30a04, 0x0000ff0f, 0x00000000,
1290 	0x28a4c, 0x07ffffff, 0x06000000,
1291 	0x4d8, 0x00000fff, 0x00000100,
1292 	0x3e78, 0x00000001, 0x00000002,
1293 	0xc768, 0x00000008, 0x00000008,
1294 	0x8c00, 0x000000ff, 0x00000003,
1295 	0x214f8, 0x01ff01ff, 0x00000002,
1296 	0x21498, 0x007ff800, 0x00200000,
1297 	0x2015c, 0xffffffff, 0x00000f40,
1298 	0x88c4, 0x001f3ae3, 0x00000082,
1299 	0x88d4, 0x0000001f, 0x00000010,
1300 	0x30934, 0xffffffff, 0x00000000
1301 };
1302 
1303 static const u32 kalindi_mgcg_cgcg_init[] =
1304 {
1305 	0xc420, 0xffffffff, 0xfffffffc,
1306 	0x30800, 0xffffffff, 0xe0000000,
1307 	0x3c2a0, 0xffffffff, 0x00000100,
1308 	0x3c208, 0xffffffff, 0x00000100,
1309 	0x3c2c0, 0xffffffff, 0x00000100,
1310 	0x3c2c8, 0xffffffff, 0x00000100,
1311 	0x3c2c4, 0xffffffff, 0x00000100,
1312 	0x55e4, 0xffffffff, 0x00600100,
1313 	0x3c280, 0xffffffff, 0x00000100,
1314 	0x3c214, 0xffffffff, 0x06000100,
1315 	0x3c220, 0xffffffff, 0x00000100,
1316 	0x3c218, 0xffffffff, 0x06000100,
1317 	0x3c204, 0xffffffff, 0x00000100,
1318 	0x3c2e0, 0xffffffff, 0x00000100,
1319 	0x3c224, 0xffffffff, 0x00000100,
1320 	0x3c200, 0xffffffff, 0x00000100,
1321 	0x3c230, 0xffffffff, 0x00000100,
1322 	0x3c234, 0xffffffff, 0x00000100,
1323 	0x3c250, 0xffffffff, 0x00000100,
1324 	0x3c254, 0xffffffff, 0x00000100,
1325 	0x3c258, 0xffffffff, 0x00000100,
1326 	0x3c25c, 0xffffffff, 0x00000100,
1327 	0x3c260, 0xffffffff, 0x00000100,
1328 	0x3c27c, 0xffffffff, 0x00000100,
1329 	0x3c278, 0xffffffff, 0x00000100,
1330 	0x3c210, 0xffffffff, 0x06000100,
1331 	0x3c290, 0xffffffff, 0x00000100,
1332 	0x3c274, 0xffffffff, 0x00000100,
1333 	0x3c2b4, 0xffffffff, 0x00000100,
1334 	0x3c2b0, 0xffffffff, 0x00000100,
1335 	0x3c270, 0xffffffff, 0x00000100,
1336 	0x30800, 0xffffffff, 0xe0000000,
1337 	0x3c020, 0xffffffff, 0x00010000,
1338 	0x3c024, 0xffffffff, 0x00030002,
1339 	0x3c028, 0xffffffff, 0x00040007,
1340 	0x3c02c, 0xffffffff, 0x00060005,
1341 	0x3c030, 0xffffffff, 0x00090008,
1342 	0x3c034, 0xffffffff, 0x00010000,
1343 	0x3c038, 0xffffffff, 0x00030002,
1344 	0x3c03c, 0xffffffff, 0x00040007,
1345 	0x3c040, 0xffffffff, 0x00060005,
1346 	0x3c044, 0xffffffff, 0x00090008,
1347 	0x3c000, 0xffffffff, 0x96e00200,
1348 	0x8708, 0xffffffff, 0x00900100,
1349 	0xc424, 0xffffffff, 0x0020003f,
1350 	0x38, 0xffffffff, 0x0140001c,
1351 	0x3c, 0x000f0000, 0x000f0000,
1352 	0x220, 0xffffffff, 0xC060000C,
1353 	0x224, 0xc0000fff, 0x00000100,
1354 	0x20a8, 0xffffffff, 0x00000104,
1355 	0x55e4, 0xff000fff, 0x00000100,
1356 	0x30cc, 0xc0000fff, 0x00000104,
1357 	0xc1e4, 0x00000001, 0x00000001,
1358 	0xd00c, 0xff000ff0, 0x00000100,
1359 	0xd80c, 0xff000ff0, 0x00000100
1360 };
1361 
1362 static const u32 hawaii_golden_spm_registers[] =
1363 {
1364 	0x30800, 0xe0ffffff, 0xe0000000
1365 };
1366 
1367 static const u32 hawaii_golden_common_registers[] =
1368 {
1369 	0x30800, 0xffffffff, 0xe0000000,
1370 	0x28350, 0xffffffff, 0x3a00161a,
1371 	0x28354, 0xffffffff, 0x0000002e,
1372 	0x9a10, 0xffffffff, 0x00018208,
1373 	0x98f8, 0xffffffff, 0x12011003
1374 };
1375 
1376 static const u32 hawaii_golden_registers[] =
1377 {
1378 	0x3354, 0x00000333, 0x00000333,
1379 	0x9a10, 0x00010000, 0x00058208,
1380 	0x9830, 0xffffffff, 0x00000000,
1381 	0x9834, 0xf00fffff, 0x00000400,
1382 	0x9838, 0x0002021c, 0x00020200,
1383 	0xc78, 0x00000080, 0x00000000,
1384 	0x5bb0, 0x000000f0, 0x00000070,
1385 	0x5bc0, 0xf0311fff, 0x80300000,
1386 	0x350c, 0x00810000, 0x408af000,
1387 	0x7030, 0x31000111, 0x00000011,
1388 	0x2f48, 0x73773777, 0x12010001,
1389 	0x2120, 0x0000007f, 0x0000001b,
1390 	0x21dc, 0x00007fb6, 0x00002191,
1391 	0x3628, 0x0000003f, 0x0000000a,
1392 	0x362c, 0x0000003f, 0x0000000a,
1393 	0x2ae4, 0x00073ffe, 0x000022a2,
1394 	0x240c, 0x000007ff, 0x00000000,
1395 	0x8bf0, 0x00002001, 0x00000001,
1396 	0x8b24, 0xffffffff, 0x00ffffff,
1397 	0x30a04, 0x0000ff0f, 0x00000000,
1398 	0x28a4c, 0x07ffffff, 0x06000000,
1399 	0x3e78, 0x00000001, 0x00000002,
1400 	0xc768, 0x00000008, 0x00000008,
1401 	0xc770, 0x00000f00, 0x00000800,
1402 	0xc774, 0x00000f00, 0x00000800,
1403 	0xc798, 0x00ffffff, 0x00ff7fbf,
1404 	0xc79c, 0x00ffffff, 0x00ff7faf,
1405 	0x8c00, 0x000000ff, 0x00000800,
1406 	0xe40, 0x00001fff, 0x00001fff,
1407 	0x9060, 0x0000007f, 0x00000020,
1408 	0x9508, 0x00010000, 0x00010000,
1409 	0xae00, 0x00100000, 0x000ff07c,
1410 	0xac14, 0x000003ff, 0x0000000f,
1411 	0xac10, 0xffffffff, 0x7564fdec,
1412 	0xac0c, 0xffffffff, 0x3120b9a8,
1413 	0xac08, 0x20000000, 0x0f9c0000
1414 };
1415 
1416 static const u32 hawaii_mgcg_cgcg_init[] =
1417 {
1418 	0xc420, 0xffffffff, 0xfffffffd,
1419 	0x30800, 0xffffffff, 0xe0000000,
1420 	0x3c2a0, 0xffffffff, 0x00000100,
1421 	0x3c208, 0xffffffff, 0x00000100,
1422 	0x3c2c0, 0xffffffff, 0x00000100,
1423 	0x3c2c8, 0xffffffff, 0x00000100,
1424 	0x3c2c4, 0xffffffff, 0x00000100,
1425 	0x55e4, 0xffffffff, 0x00200100,
1426 	0x3c280, 0xffffffff, 0x00000100,
1427 	0x3c214, 0xffffffff, 0x06000100,
1428 	0x3c220, 0xffffffff, 0x00000100,
1429 	0x3c218, 0xffffffff, 0x06000100,
1430 	0x3c204, 0xffffffff, 0x00000100,
1431 	0x3c2e0, 0xffffffff, 0x00000100,
1432 	0x3c224, 0xffffffff, 0x00000100,
1433 	0x3c200, 0xffffffff, 0x00000100,
1434 	0x3c230, 0xffffffff, 0x00000100,
1435 	0x3c234, 0xffffffff, 0x00000100,
1436 	0x3c250, 0xffffffff, 0x00000100,
1437 	0x3c254, 0xffffffff, 0x00000100,
1438 	0x3c258, 0xffffffff, 0x00000100,
1439 	0x3c25c, 0xffffffff, 0x00000100,
1440 	0x3c260, 0xffffffff, 0x00000100,
1441 	0x3c27c, 0xffffffff, 0x00000100,
1442 	0x3c278, 0xffffffff, 0x00000100,
1443 	0x3c210, 0xffffffff, 0x06000100,
1444 	0x3c290, 0xffffffff, 0x00000100,
1445 	0x3c274, 0xffffffff, 0x00000100,
1446 	0x3c2b4, 0xffffffff, 0x00000100,
1447 	0x3c2b0, 0xffffffff, 0x00000100,
1448 	0x3c270, 0xffffffff, 0x00000100,
1449 	0x30800, 0xffffffff, 0xe0000000,
1450 	0x3c020, 0xffffffff, 0x00010000,
1451 	0x3c024, 0xffffffff, 0x00030002,
1452 	0x3c028, 0xffffffff, 0x00040007,
1453 	0x3c02c, 0xffffffff, 0x00060005,
1454 	0x3c030, 0xffffffff, 0x00090008,
1455 	0x3c034, 0xffffffff, 0x00010000,
1456 	0x3c038, 0xffffffff, 0x00030002,
1457 	0x3c03c, 0xffffffff, 0x00040007,
1458 	0x3c040, 0xffffffff, 0x00060005,
1459 	0x3c044, 0xffffffff, 0x00090008,
1460 	0x3c048, 0xffffffff, 0x00010000,
1461 	0x3c04c, 0xffffffff, 0x00030002,
1462 	0x3c050, 0xffffffff, 0x00040007,
1463 	0x3c054, 0xffffffff, 0x00060005,
1464 	0x3c058, 0xffffffff, 0x00090008,
1465 	0x3c05c, 0xffffffff, 0x00010000,
1466 	0x3c060, 0xffffffff, 0x00030002,
1467 	0x3c064, 0xffffffff, 0x00040007,
1468 	0x3c068, 0xffffffff, 0x00060005,
1469 	0x3c06c, 0xffffffff, 0x00090008,
1470 	0x3c070, 0xffffffff, 0x00010000,
1471 	0x3c074, 0xffffffff, 0x00030002,
1472 	0x3c078, 0xffffffff, 0x00040007,
1473 	0x3c07c, 0xffffffff, 0x00060005,
1474 	0x3c080, 0xffffffff, 0x00090008,
1475 	0x3c084, 0xffffffff, 0x00010000,
1476 	0x3c088, 0xffffffff, 0x00030002,
1477 	0x3c08c, 0xffffffff, 0x00040007,
1478 	0x3c090, 0xffffffff, 0x00060005,
1479 	0x3c094, 0xffffffff, 0x00090008,
1480 	0x3c098, 0xffffffff, 0x00010000,
1481 	0x3c09c, 0xffffffff, 0x00030002,
1482 	0x3c0a0, 0xffffffff, 0x00040007,
1483 	0x3c0a4, 0xffffffff, 0x00060005,
1484 	0x3c0a8, 0xffffffff, 0x00090008,
1485 	0x3c0ac, 0xffffffff, 0x00010000,
1486 	0x3c0b0, 0xffffffff, 0x00030002,
1487 	0x3c0b4, 0xffffffff, 0x00040007,
1488 	0x3c0b8, 0xffffffff, 0x00060005,
1489 	0x3c0bc, 0xffffffff, 0x00090008,
1490 	0x3c0c0, 0xffffffff, 0x00010000,
1491 	0x3c0c4, 0xffffffff, 0x00030002,
1492 	0x3c0c8, 0xffffffff, 0x00040007,
1493 	0x3c0cc, 0xffffffff, 0x00060005,
1494 	0x3c0d0, 0xffffffff, 0x00090008,
1495 	0x3c0d4, 0xffffffff, 0x00010000,
1496 	0x3c0d8, 0xffffffff, 0x00030002,
1497 	0x3c0dc, 0xffffffff, 0x00040007,
1498 	0x3c0e0, 0xffffffff, 0x00060005,
1499 	0x3c0e4, 0xffffffff, 0x00090008,
1500 	0x3c0e8, 0xffffffff, 0x00010000,
1501 	0x3c0ec, 0xffffffff, 0x00030002,
1502 	0x3c0f0, 0xffffffff, 0x00040007,
1503 	0x3c0f4, 0xffffffff, 0x00060005,
1504 	0x3c0f8, 0xffffffff, 0x00090008,
1505 	0xc318, 0xffffffff, 0x00020200,
1506 	0x3350, 0xffffffff, 0x00000200,
1507 	0x15c0, 0xffffffff, 0x00000400,
1508 	0x55e8, 0xffffffff, 0x00000000,
1509 	0x2f50, 0xffffffff, 0x00000902,
1510 	0x3c000, 0xffffffff, 0x96940200,
1511 	0x8708, 0xffffffff, 0x00900100,
1512 	0xc424, 0xffffffff, 0x0020003f,
1513 	0x38, 0xffffffff, 0x0140001c,
1514 	0x3c, 0x000f0000, 0x000f0000,
1515 	0x220, 0xffffffff, 0xc060000c,
1516 	0x224, 0xc0000fff, 0x00000100,
1517 	0xf90, 0xffffffff, 0x00000100,
1518 	0xf98, 0x00000101, 0x00000000,
1519 	0x20a8, 0xffffffff, 0x00000104,
1520 	0x55e4, 0xff000fff, 0x00000100,
1521 	0x30cc, 0xc0000fff, 0x00000104,
1522 	0xc1e4, 0x00000001, 0x00000001,
1523 	0xd00c, 0xff000ff0, 0x00000100,
1524 	0xd80c, 0xff000ff0, 0x00000100
1525 };
1526 
1527 static const u32 godavari_golden_registers[] =
1528 {
1529 	0x55e4, 0xff607fff, 0xfc000100,
1530 	0x6ed8, 0x00010101, 0x00010000,
1531 	0x9830, 0xffffffff, 0x00000000,
1532 	0x98302, 0xf00fffff, 0x00000400,
1533 	0x6130, 0xffffffff, 0x00010000,
1534 	0x5bb0, 0x000000f0, 0x00000070,
1535 	0x5bc0, 0xf0311fff, 0x80300000,
1536 	0x98f8, 0x73773777, 0x12010001,
1537 	0x98fc, 0xffffffff, 0x00000010,
1538 	0x8030, 0x00001f0f, 0x0000100a,
1539 	0x2f48, 0x73773777, 0x12010001,
1540 	0x2408, 0x000fffff, 0x000c007f,
1541 	0x8a14, 0xf000003f, 0x00000007,
1542 	0x8b24, 0xffffffff, 0x00ff0fff,
1543 	0x30a04, 0x0000ff0f, 0x00000000,
1544 	0x28a4c, 0x07ffffff, 0x06000000,
1545 	0x4d8, 0x00000fff, 0x00000100,
1546 	0xd014, 0x00010000, 0x00810001,
1547 	0xd814, 0x00010000, 0x00810001,
1548 	0x3e78, 0x00000001, 0x00000002,
1549 	0xc768, 0x00000008, 0x00000008,
1550 	0xc770, 0x00000f00, 0x00000800,
1551 	0xc774, 0x00000f00, 0x00000800,
1552 	0xc798, 0x00ffffff, 0x00ff7fbf,
1553 	0xc79c, 0x00ffffff, 0x00ff7faf,
1554 	0x8c00, 0x000000ff, 0x00000001,
1555 	0x214f8, 0x01ff01ff, 0x00000002,
1556 	0x21498, 0x007ff800, 0x00200000,
1557 	0x2015c, 0xffffffff, 0x00000f40,
1558 	0x88c4, 0x001f3ae3, 0x00000082,
1559 	0x88d4, 0x0000001f, 0x00000010,
1560 	0x30934, 0xffffffff, 0x00000000
1561 };
1562 
1563 
1564 static void cik_init_golden_registers(struct radeon_device *rdev)
1565 {
1566 	switch (rdev->family) {
1567 	case CHIP_BONAIRE:
1568 		radeon_program_register_sequence(rdev,
1569 						 bonaire_mgcg_cgcg_init,
1570 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1571 		radeon_program_register_sequence(rdev,
1572 						 bonaire_golden_registers,
1573 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1574 		radeon_program_register_sequence(rdev,
1575 						 bonaire_golden_common_registers,
1576 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1577 		radeon_program_register_sequence(rdev,
1578 						 bonaire_golden_spm_registers,
1579 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1580 		break;
1581 	case CHIP_KABINI:
1582 		radeon_program_register_sequence(rdev,
1583 						 kalindi_mgcg_cgcg_init,
1584 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585 		radeon_program_register_sequence(rdev,
1586 						 kalindi_golden_registers,
1587 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1588 		radeon_program_register_sequence(rdev,
1589 						 kalindi_golden_common_registers,
1590 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591 		radeon_program_register_sequence(rdev,
1592 						 kalindi_golden_spm_registers,
1593 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1594 		break;
1595 	case CHIP_MULLINS:
1596 		radeon_program_register_sequence(rdev,
1597 						 kalindi_mgcg_cgcg_init,
1598 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1599 		radeon_program_register_sequence(rdev,
1600 						 godavari_golden_registers,
1601 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1602 		radeon_program_register_sequence(rdev,
1603 						 kalindi_golden_common_registers,
1604 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1605 		radeon_program_register_sequence(rdev,
1606 						 kalindi_golden_spm_registers,
1607 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1608 		break;
1609 	case CHIP_KAVERI:
1610 		radeon_program_register_sequence(rdev,
1611 						 spectre_mgcg_cgcg_init,
1612 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1613 		radeon_program_register_sequence(rdev,
1614 						 spectre_golden_registers,
1615 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1616 		radeon_program_register_sequence(rdev,
1617 						 spectre_golden_common_registers,
1618 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1619 		radeon_program_register_sequence(rdev,
1620 						 spectre_golden_spm_registers,
1621 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1622 		break;
1623 	case CHIP_HAWAII:
1624 		radeon_program_register_sequence(rdev,
1625 						 hawaii_mgcg_cgcg_init,
1626 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1627 		radeon_program_register_sequence(rdev,
1628 						 hawaii_golden_registers,
1629 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1630 		radeon_program_register_sequence(rdev,
1631 						 hawaii_golden_common_registers,
1632 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1633 		radeon_program_register_sequence(rdev,
1634 						 hawaii_golden_spm_registers,
1635 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1636 		break;
1637 	default:
1638 		break;
1639 	}
1640 }
1641 
1642 /**
1643  * cik_get_xclk - get the xclk
1644  *
1645  * @rdev: radeon_device pointer
1646  *
1647  * Returns the reference clock used by the gfx engine
1648  * (CIK).
1649  */
1650 u32 cik_get_xclk(struct radeon_device *rdev)
1651 {
1652         u32 reference_clock = rdev->clock.spll.reference_freq;
1653 
1654 	if (rdev->flags & RADEON_IS_IGP) {
1655 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1656 			return reference_clock / 2;
1657 	} else {
1658 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1659 			return reference_clock / 4;
1660 	}
1661 	return reference_clock;
1662 }
1663 
1664 /**
1665  * cik_mm_rdoorbell - read a doorbell dword
1666  *
1667  * @rdev: radeon_device pointer
1668  * @index: doorbell index
1669  *
1670  * Returns the value in the doorbell aperture at the
1671  * requested doorbell index (CIK).
1672  */
1673 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1674 {
1675 	if (index < rdev->doorbell.num_doorbells) {
1676 		return readl(rdev->doorbell.ptr + index);
1677 	} else {
1678 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1679 		return 0;
1680 	}
1681 }
1682 
1683 /**
1684  * cik_mm_wdoorbell - write a doorbell dword
1685  *
1686  * @rdev: radeon_device pointer
1687  * @index: doorbell index
1688  * @v: value to write
1689  *
1690  * Writes @v to the doorbell aperture at the
1691  * requested doorbell index (CIK).
1692  */
1693 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1694 {
1695 	if (index < rdev->doorbell.num_doorbells) {
1696 		writel(v, rdev->doorbell.ptr + index);
1697 	} else {
1698 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1699 	}
1700 }
1701 
1702 #define BONAIRE_IO_MC_REGS_SIZE 36
1703 
1704 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1705 {
1706 	{0x00000070, 0x04400000},
1707 	{0x00000071, 0x80c01803},
1708 	{0x00000072, 0x00004004},
1709 	{0x00000073, 0x00000100},
1710 	{0x00000074, 0x00ff0000},
1711 	{0x00000075, 0x34000000},
1712 	{0x00000076, 0x08000014},
1713 	{0x00000077, 0x00cc08ec},
1714 	{0x00000078, 0x00000400},
1715 	{0x00000079, 0x00000000},
1716 	{0x0000007a, 0x04090000},
1717 	{0x0000007c, 0x00000000},
1718 	{0x0000007e, 0x4408a8e8},
1719 	{0x0000007f, 0x00000304},
1720 	{0x00000080, 0x00000000},
1721 	{0x00000082, 0x00000001},
1722 	{0x00000083, 0x00000002},
1723 	{0x00000084, 0xf3e4f400},
1724 	{0x00000085, 0x052024e3},
1725 	{0x00000087, 0x00000000},
1726 	{0x00000088, 0x01000000},
1727 	{0x0000008a, 0x1c0a0000},
1728 	{0x0000008b, 0xff010000},
1729 	{0x0000008d, 0xffffefff},
1730 	{0x0000008e, 0xfff3efff},
1731 	{0x0000008f, 0xfff3efbf},
1732 	{0x00000092, 0xf7ffffff},
1733 	{0x00000093, 0xffffff7f},
1734 	{0x00000095, 0x00101101},
1735 	{0x00000096, 0x00000fff},
1736 	{0x00000097, 0x00116fff},
1737 	{0x00000098, 0x60010000},
1738 	{0x00000099, 0x10010000},
1739 	{0x0000009a, 0x00006000},
1740 	{0x0000009b, 0x00001000},
1741 	{0x0000009f, 0x00b48000}
1742 };
1743 
1744 #define HAWAII_IO_MC_REGS_SIZE 22
1745 
1746 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1747 {
1748 	{0x0000007d, 0x40000000},
1749 	{0x0000007e, 0x40180304},
1750 	{0x0000007f, 0x0000ff00},
1751 	{0x00000081, 0x00000000},
1752 	{0x00000083, 0x00000800},
1753 	{0x00000086, 0x00000000},
1754 	{0x00000087, 0x00000100},
1755 	{0x00000088, 0x00020100},
1756 	{0x00000089, 0x00000000},
1757 	{0x0000008b, 0x00040000},
1758 	{0x0000008c, 0x00000100},
1759 	{0x0000008e, 0xff010000},
1760 	{0x00000090, 0xffffefff},
1761 	{0x00000091, 0xfff3efff},
1762 	{0x00000092, 0xfff3efbf},
1763 	{0x00000093, 0xf7ffffff},
1764 	{0x00000094, 0xffffff7f},
1765 	{0x00000095, 0x00000fff},
1766 	{0x00000096, 0x00116fff},
1767 	{0x00000097, 0x60010000},
1768 	{0x00000098, 0x10010000},
1769 	{0x0000009f, 0x00c79000}
1770 };
1771 
1772 
1773 /**
1774  * cik_srbm_select - select specific register instances
1775  *
1776  * @rdev: radeon_device pointer
1777  * @me: selected ME (micro engine)
1778  * @pipe: pipe
1779  * @queue: queue
1780  * @vmid: VMID
1781  *
1782  * Switches the currently active registers instances.  Some
1783  * registers are instanced per VMID, others are instanced per
1784  * me/pipe/queue combination.
1785  */
1786 static void cik_srbm_select(struct radeon_device *rdev,
1787 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1788 {
1789 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1790 			     MEID(me & 0x3) |
1791 			     VMID(vmid & 0xf) |
1792 			     QUEUEID(queue & 0x7));
1793 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1794 }
1795 
1796 /* ucode loading */
1797 /**
1798  * ci_mc_load_microcode - load MC ucode into the hw
1799  *
1800  * @rdev: radeon_device pointer
1801  *
1802  * Load the GDDR MC ucode into the hw (CIK).
1803  * Returns 0 on success, error on failure.
1804  */
1805 int ci_mc_load_microcode(struct radeon_device *rdev)
1806 {
1807 	const __be32 *fw_data = NULL;
1808 	const __le32 *new_fw_data = NULL;
1809 	u32 running, blackout = 0;
1810 	u32 *io_mc_regs = NULL;
1811 	const __le32 *new_io_mc_regs = NULL;
1812 	int i, regs_size, ucode_size;
1813 
1814 	if (!rdev->mc_fw)
1815 		return -EINVAL;
1816 
1817 	if (rdev->new_fw) {
1818 		const struct mc_firmware_header_v1_0 *hdr =
1819 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1820 
1821 		radeon_ucode_print_mc_hdr(&hdr->header);
1822 
1823 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1824 		new_io_mc_regs = (const __le32 *)
1825 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1826 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1827 		new_fw_data = (const __le32 *)
1828 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1829 	} else {
1830 		ucode_size = rdev->mc_fw->size / 4;
1831 
1832 		switch (rdev->family) {
1833 		case CHIP_BONAIRE:
1834 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1835 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1836 			break;
1837 		case CHIP_HAWAII:
1838 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1839 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1840 			break;
1841 		default:
1842 			return -EINVAL;
1843 		}
1844 		fw_data = (const __be32 *)rdev->mc_fw->data;
1845 	}
1846 
1847 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1848 
1849 	if (running == 0) {
1850 		if (running) {
1851 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1852 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1853 		}
1854 
1855 		/* reset the engine and set to writable */
1856 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1857 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1858 
1859 		/* load mc io regs */
1860 		for (i = 0; i < regs_size; i++) {
1861 			if (rdev->new_fw) {
1862 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1863 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1864 			} else {
1865 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1866 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1867 			}
1868 		}
1869 		/* load the MC ucode */
1870 		for (i = 0; i < ucode_size; i++) {
1871 			if (rdev->new_fw)
1872 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1873 			else
1874 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1875 		}
1876 
1877 		/* put the engine back into the active state */
1878 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1879 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1880 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1881 
1882 		/* wait for training to complete */
1883 		for (i = 0; i < rdev->usec_timeout; i++) {
1884 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1885 				break;
1886 			udelay(1);
1887 		}
1888 		for (i = 0; i < rdev->usec_timeout; i++) {
1889 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1890 				break;
1891 			udelay(1);
1892 		}
1893 
1894 		if (running)
1895 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1896 	}
1897 
1898 	return 0;
1899 }
1900 
1901 /**
1902  * cik_init_microcode - load ucode images from disk
1903  *
1904  * @rdev: radeon_device pointer
1905  *
1906  * Use the firmware interface to load the ucode images into
1907  * the driver (not loaded into hw).
1908  * Returns 0 on success, error on failure.
1909  */
1910 static int cik_init_microcode(struct radeon_device *rdev)
1911 {
1912 	const char *chip_name;
1913 	const char *new_chip_name;
1914 	size_t pfp_req_size, me_req_size, ce_req_size,
1915 		mec_req_size, rlc_req_size, mc_req_size = 0,
1916 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1917 	char fw_name[30];
1918 	int new_fw = 0;
1919 	int err;
1920 	int num_fw;
1921 
1922 	DRM_DEBUG("\n");
1923 
1924 	switch (rdev->family) {
1925 	case CHIP_BONAIRE:
1926 		chip_name = "BONAIRE";
1927 		new_chip_name = "bonaire";
1928 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1929 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1930 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1931 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1932 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1933 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1934 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1935 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1936 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1937 		num_fw = 8;
1938 		break;
1939 	case CHIP_HAWAII:
1940 		chip_name = "HAWAII";
1941 		new_chip_name = "hawaii";
1942 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1944 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1948 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1949 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1951 		num_fw = 8;
1952 		break;
1953 	case CHIP_KAVERI:
1954 		chip_name = "KAVERI";
1955 		new_chip_name = "kaveri";
1956 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1958 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1961 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1962 		num_fw = 7;
1963 		break;
1964 	case CHIP_KABINI:
1965 		chip_name = "KABINI";
1966 		new_chip_name = "kabini";
1967 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1968 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1969 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1970 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1971 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1972 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1973 		num_fw = 6;
1974 		break;
1975 	case CHIP_MULLINS:
1976 		chip_name = "MULLINS";
1977 		new_chip_name = "mullins";
1978 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1979 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1980 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1981 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1982 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1983 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1984 		num_fw = 6;
1985 		break;
1986 	default: BUG();
1987 	}
1988 
1989 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1990 
1991 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1992 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1993 	if (err) {
1994 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1995 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1996 		if (err)
1997 			goto out;
1998 		if (rdev->pfp_fw->size != pfp_req_size) {
1999 			printk(KERN_ERR
2000 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2001 			       rdev->pfp_fw->size, fw_name);
2002 			err = -EINVAL;
2003 			goto out;
2004 		}
2005 	} else {
2006 		err = radeon_ucode_validate(rdev->pfp_fw);
2007 		if (err) {
2008 			printk(KERN_ERR
2009 			       "cik_fw: validation failed for firmware \"%s\"\n",
2010 			       fw_name);
2011 			goto out;
2012 		} else {
2013 			new_fw++;
2014 		}
2015 	}
2016 
2017 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2018 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2019 	if (err) {
2020 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2021 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2022 		if (err)
2023 			goto out;
2024 		if (rdev->me_fw->size != me_req_size) {
2025 			printk(KERN_ERR
2026 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2027 			       rdev->me_fw->size, fw_name);
2028 			err = -EINVAL;
2029 		}
2030 	} else {
2031 		err = radeon_ucode_validate(rdev->me_fw);
2032 		if (err) {
2033 			printk(KERN_ERR
2034 			       "cik_fw: validation failed for firmware \"%s\"\n",
2035 			       fw_name);
2036 			goto out;
2037 		} else {
2038 			new_fw++;
2039 		}
2040 	}
2041 
2042 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2043 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2044 	if (err) {
2045 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2046 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2047 		if (err)
2048 			goto out;
2049 		if (rdev->ce_fw->size != ce_req_size) {
2050 			printk(KERN_ERR
2051 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052 			       rdev->ce_fw->size, fw_name);
2053 			err = -EINVAL;
2054 		}
2055 	} else {
2056 		err = radeon_ucode_validate(rdev->ce_fw);
2057 		if (err) {
2058 			printk(KERN_ERR
2059 			       "cik_fw: validation failed for firmware \"%s\"\n",
2060 			       fw_name);
2061 			goto out;
2062 		} else {
2063 			new_fw++;
2064 		}
2065 	}
2066 
2067 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2068 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2069 	if (err) {
2070 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2071 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2072 		if (err)
2073 			goto out;
2074 		if (rdev->mec_fw->size != mec_req_size) {
2075 			printk(KERN_ERR
2076 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2077 			       rdev->mec_fw->size, fw_name);
2078 			err = -EINVAL;
2079 		}
2080 	} else {
2081 		err = radeon_ucode_validate(rdev->mec_fw);
2082 		if (err) {
2083 			printk(KERN_ERR
2084 			       "cik_fw: validation failed for firmware \"%s\"\n",
2085 			       fw_name);
2086 			goto out;
2087 		} else {
2088 			new_fw++;
2089 		}
2090 	}
2091 
2092 	if (rdev->family == CHIP_KAVERI) {
2093 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2094 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2095 		if (err) {
2096 			goto out;
2097 		} else {
2098 			err = radeon_ucode_validate(rdev->mec2_fw);
2099 			if (err) {
2100 				goto out;
2101 			} else {
2102 				new_fw++;
2103 			}
2104 		}
2105 	}
2106 
2107 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2108 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2109 	if (err) {
2110 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2111 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2112 		if (err)
2113 			goto out;
2114 		if (rdev->rlc_fw->size != rlc_req_size) {
2115 			printk(KERN_ERR
2116 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2117 			       rdev->rlc_fw->size, fw_name);
2118 			err = -EINVAL;
2119 		}
2120 	} else {
2121 		err = radeon_ucode_validate(rdev->rlc_fw);
2122 		if (err) {
2123 			printk(KERN_ERR
2124 			       "cik_fw: validation failed for firmware \"%s\"\n",
2125 			       fw_name);
2126 			goto out;
2127 		} else {
2128 			new_fw++;
2129 		}
2130 	}
2131 
2132 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2133 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2134 	if (err) {
2135 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2136 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2137 		if (err)
2138 			goto out;
2139 		if (rdev->sdma_fw->size != sdma_req_size) {
2140 			printk(KERN_ERR
2141 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2142 			       rdev->sdma_fw->size, fw_name);
2143 			err = -EINVAL;
2144 		}
2145 	} else {
2146 		err = radeon_ucode_validate(rdev->sdma_fw);
2147 		if (err) {
2148 			printk(KERN_ERR
2149 			       "cik_fw: validation failed for firmware \"%s\"\n",
2150 			       fw_name);
2151 			goto out;
2152 		} else {
2153 			new_fw++;
2154 		}
2155 	}
2156 
2157 	/* No SMC, MC ucode on APUs */
2158 	if (!(rdev->flags & RADEON_IS_IGP)) {
2159 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2160 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2161 		if (err) {
2162 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2163 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2164 			if (err) {
2165 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2166 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2167 				if (err)
2168 					goto out;
2169 			}
2170 			if ((rdev->mc_fw->size != mc_req_size) &&
2171 			    (rdev->mc_fw->size != mc2_req_size)){
2172 				printk(KERN_ERR
2173 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2174 				       rdev->mc_fw->size, fw_name);
2175 				err = -EINVAL;
2176 			}
2177 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2178 		} else {
2179 			err = radeon_ucode_validate(rdev->mc_fw);
2180 			if (err) {
2181 				printk(KERN_ERR
2182 				       "cik_fw: validation failed for firmware \"%s\"\n",
2183 				       fw_name);
2184 				goto out;
2185 			} else {
2186 				new_fw++;
2187 			}
2188 		}
2189 
2190 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2191 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2192 		if (err) {
2193 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2194 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2195 			if (err) {
2196 				printk(KERN_ERR
2197 				       "smc: error loading firmware \"%s\"\n",
2198 				       fw_name);
2199 				release_firmware(rdev->smc_fw);
2200 				rdev->smc_fw = NULL;
2201 				err = 0;
2202 			} else if (rdev->smc_fw->size != smc_req_size) {
2203 				printk(KERN_ERR
2204 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2205 				       rdev->smc_fw->size, fw_name);
2206 				err = -EINVAL;
2207 			}
2208 		} else {
2209 			err = radeon_ucode_validate(rdev->smc_fw);
2210 			if (err) {
2211 				printk(KERN_ERR
2212 				       "cik_fw: validation failed for firmware \"%s\"\n",
2213 				       fw_name);
2214 				goto out;
2215 			} else {
2216 				new_fw++;
2217 			}
2218 		}
2219 	}
2220 
2221 	if (new_fw == 0) {
2222 		rdev->new_fw = false;
2223 	} else if (new_fw < num_fw) {
2224 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2225 		err = -EINVAL;
2226 	} else {
2227 		rdev->new_fw = true;
2228 	}
2229 
2230 out:
2231 	if (err) {
2232 		if (err != -EINVAL)
2233 			printk(KERN_ERR
2234 			       "cik_cp: Failed to load firmware \"%s\"\n",
2235 			       fw_name);
2236 		release_firmware(rdev->pfp_fw);
2237 		rdev->pfp_fw = NULL;
2238 		release_firmware(rdev->me_fw);
2239 		rdev->me_fw = NULL;
2240 		release_firmware(rdev->ce_fw);
2241 		rdev->ce_fw = NULL;
2242 		release_firmware(rdev->mec_fw);
2243 		rdev->mec_fw = NULL;
2244 		release_firmware(rdev->mec2_fw);
2245 		rdev->mec2_fw = NULL;
2246 		release_firmware(rdev->rlc_fw);
2247 		rdev->rlc_fw = NULL;
2248 		release_firmware(rdev->sdma_fw);
2249 		rdev->sdma_fw = NULL;
2250 		release_firmware(rdev->mc_fw);
2251 		rdev->mc_fw = NULL;
2252 		release_firmware(rdev->smc_fw);
2253 		rdev->smc_fw = NULL;
2254 	}
2255 	return err;
2256 }
2257 
2258 /*
2259  * Core functions
2260  */
2261 /**
2262  * cik_tiling_mode_table_init - init the hw tiling table
2263  *
2264  * @rdev: radeon_device pointer
2265  *
2266  * Starting with SI, the tiling setup is done globally in a
2267  * set of 32 tiling modes.  Rather than selecting each set of
2268  * parameters per surface as on older asics, we just select
2269  * which index in the tiling table we want to use, and the
2270  * surface uses those parameters (CIK).
2271  */
2272 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2273 {
2274 	const u32 num_tile_mode_states = 32;
2275 	const u32 num_secondary_tile_mode_states = 16;
2276 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2277 	u32 num_pipe_configs;
2278 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2279 		rdev->config.cik.max_shader_engines;
2280 
2281 	switch (rdev->config.cik.mem_row_size_in_kb) {
2282 	case 1:
2283 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2284 		break;
2285 	case 2:
2286 	default:
2287 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2288 		break;
2289 	case 4:
2290 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2291 		break;
2292 	}
2293 
2294 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2295 	if (num_pipe_configs > 8)
2296 		num_pipe_configs = 16;
2297 
2298 	if (num_pipe_configs == 16) {
2299 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2300 			switch (reg_offset) {
2301 			case 0:
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2306 				break;
2307 			case 1:
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312 				break;
2313 			case 2:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2318 				break;
2319 			case 3:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2324 				break;
2325 			case 4:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(split_equal_to_row_size));
2330 				break;
2331 			case 5:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 				break;
2336 			case 6:
2337 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2341 				break;
2342 			case 7:
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2344 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 TILE_SPLIT(split_equal_to_row_size));
2347 				break;
2348 			case 8:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2350 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2351 				break;
2352 			case 9:
2353 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2356 				break;
2357 			case 10:
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 				break;
2363 			case 11:
2364 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2367 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 				break;
2369 			case 12:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 13:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2379 				break;
2380 			case 14:
2381 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 				break;
2386 			case 16:
2387 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2390 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391 				break;
2392 			case 17:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 27:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2402 				break;
2403 			case 28:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408 				break;
2409 			case 29:
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 				break;
2415 			case 30:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			default:
2422 				gb_tile_moden = 0;
2423 				break;
2424 			}
2425 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2426 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2427 		}
2428 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2429 			switch (reg_offset) {
2430 			case 0:
2431 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK));
2435 				break;
2436 			case 1:
2437 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440 						 NUM_BANKS(ADDR_SURF_16_BANK));
2441 				break;
2442 			case 2:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 3:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 4:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_8_BANK));
2459 				break;
2460 			case 5:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_4_BANK));
2465 				break;
2466 			case 6:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_2_BANK));
2471 				break;
2472 			case 8:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 						 NUM_BANKS(ADDR_SURF_16_BANK));
2477 				break;
2478 			case 9:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 						 NUM_BANKS(ADDR_SURF_16_BANK));
2483 				break;
2484 			case 10:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 11:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 						 NUM_BANKS(ADDR_SURF_8_BANK));
2495 				break;
2496 			case 12:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_4_BANK));
2501 				break;
2502 			case 13:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_2_BANK));
2507 				break;
2508 			case 14:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_2_BANK));
2513 				break;
2514 			default:
2515 				gb_tile_moden = 0;
2516 				break;
2517 			}
2518 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2519 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2520 		}
2521 	} else if (num_pipe_configs == 8) {
2522 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2523 			switch (reg_offset) {
2524 			case 0:
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2529 				break;
2530 			case 1:
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2535 				break;
2536 			case 2:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2541 				break;
2542 			case 3:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2547 				break;
2548 			case 4:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(split_equal_to_row_size));
2553 				break;
2554 			case 5:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558 				break;
2559 			case 6:
2560 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 				break;
2565 			case 7:
2566 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2567 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 TILE_SPLIT(split_equal_to_row_size));
2570 				break;
2571 			case 8:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2573 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2574 				break;
2575 			case 9:
2576 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2579 				break;
2580 			case 10:
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 				break;
2586 			case 11:
2587 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 				break;
2592 			case 12:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 13:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2602 				break;
2603 			case 14:
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 				break;
2609 			case 16:
2610 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 				break;
2615 			case 17:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 27:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2625 				break;
2626 			case 28:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 				break;
2632 			case 29:
2633 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 				break;
2638 			case 30:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			default:
2645 				gb_tile_moden = 0;
2646 				break;
2647 			}
2648 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2649 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2650 		}
2651 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2652 			switch (reg_offset) {
2653 			case 0:
2654 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657 						 NUM_BANKS(ADDR_SURF_16_BANK));
2658 				break;
2659 			case 1:
2660 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2663 						 NUM_BANKS(ADDR_SURF_16_BANK));
2664 				break;
2665 			case 2:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 3:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 4:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 						 NUM_BANKS(ADDR_SURF_8_BANK));
2682 				break;
2683 			case 5:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 						 NUM_BANKS(ADDR_SURF_4_BANK));
2688 				break;
2689 			case 6:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_2_BANK));
2694 				break;
2695 			case 8:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 9:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 10:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 11:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 12:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 13:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 14:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_2_BANK));
2736 				break;
2737 			default:
2738 				gb_tile_moden = 0;
2739 				break;
2740 			}
2741 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2742 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2743 		}
2744 	} else if (num_pipe_configs == 4) {
2745 		if (num_rbs == 4) {
2746 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2747 				switch (reg_offset) {
2748 				case 0:
2749 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2753 					break;
2754 				case 1:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2759 					break;
2760 				case 2:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2765 					break;
2766 				case 3:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2771 					break;
2772 				case 4:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(split_equal_to_row_size));
2777 					break;
2778 				case 5:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2780 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782 					break;
2783 				case 6:
2784 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 					break;
2789 				case 7:
2790 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 TILE_SPLIT(split_equal_to_row_size));
2794 					break;
2795 				case 8:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2797 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2798 					break;
2799 				case 9:
2800 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2803 					break;
2804 				case 10:
2805 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 					break;
2810 				case 11:
2811 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 					break;
2816 				case 12:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 13:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2826 					break;
2827 				case 14:
2828 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832 					break;
2833 				case 16:
2834 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 					break;
2839 				case 17:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 27:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2849 					break;
2850 				case 28:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2852 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2853 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 					break;
2856 				case 29:
2857 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861 					break;
2862 				case 30:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				default:
2869 					gb_tile_moden = 0;
2870 					break;
2871 				}
2872 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2873 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2874 			}
2875 		} else if (num_rbs < 4) {
2876 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2877 				switch (reg_offset) {
2878 				case 0:
2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2883 					break;
2884 				case 1:
2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2888 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 					break;
2890 				case 2:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 					break;
2896 				case 3:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2901 					break;
2902 				case 4:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(split_equal_to_row_size));
2907 					break;
2908 				case 5:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2910 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 					break;
2913 				case 6:
2914 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2915 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2916 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2917 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2918 					break;
2919 				case 7:
2920 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2921 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 TILE_SPLIT(split_equal_to_row_size));
2924 					break;
2925 				case 8:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2927 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2928 					break;
2929 				case 9:
2930 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2933 					break;
2934 				case 10:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				case 11:
2941 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 					break;
2946 				case 12:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 13:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2955 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 					break;
2957 				case 14:
2958 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2961 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 					break;
2963 				case 16:
2964 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 					break;
2969 				case 17:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 27:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2979 					break;
2980 				case 28:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2982 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2984 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 					break;
2986 				case 29:
2987 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 					break;
2992 				case 30:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				default:
2999 					gb_tile_moden = 0;
3000 					break;
3001 				}
3002 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3003 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3004 			}
3005 		}
3006 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3007 			switch (reg_offset) {
3008 			case 0:
3009 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 						 NUM_BANKS(ADDR_SURF_16_BANK));
3013 				break;
3014 			case 1:
3015 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 						 NUM_BANKS(ADDR_SURF_16_BANK));
3019 				break;
3020 			case 2:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 3:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 4:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 5:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_8_BANK));
3043 				break;
3044 			case 6:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3048 						 NUM_BANKS(ADDR_SURF_4_BANK));
3049 				break;
3050 			case 8:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054 						 NUM_BANKS(ADDR_SURF_16_BANK));
3055 				break;
3056 			case 9:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060 						 NUM_BANKS(ADDR_SURF_16_BANK));
3061 				break;
3062 			case 10:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 11:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 12:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 13:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084 						 NUM_BANKS(ADDR_SURF_8_BANK));
3085 				break;
3086 			case 14:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3090 						 NUM_BANKS(ADDR_SURF_4_BANK));
3091 				break;
3092 			default:
3093 				gb_tile_moden = 0;
3094 				break;
3095 			}
3096 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3097 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3098 		}
3099 	} else if (num_pipe_configs == 2) {
3100 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3101 			switch (reg_offset) {
3102 			case 0:
3103 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3105 						 PIPE_CONFIG(ADDR_SURF_P2) |
3106 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3107 				break;
3108 			case 1:
3109 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3111 						 PIPE_CONFIG(ADDR_SURF_P2) |
3112 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3113 				break;
3114 			case 2:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3119 				break;
3120 			case 3:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3125 				break;
3126 			case 4:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(split_equal_to_row_size));
3131 				break;
3132 			case 5:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 						 PIPE_CONFIG(ADDR_SURF_P2) |
3135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136 				break;
3137 			case 6:
3138 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3139 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3140 						 PIPE_CONFIG(ADDR_SURF_P2) |
3141 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3142 				break;
3143 			case 7:
3144 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3145 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 TILE_SPLIT(split_equal_to_row_size));
3148 				break;
3149 			case 8:
3150 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3151 						PIPE_CONFIG(ADDR_SURF_P2);
3152 				break;
3153 			case 9:
3154 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3156 						 PIPE_CONFIG(ADDR_SURF_P2));
3157 				break;
3158 			case 10:
3159 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3161 						 PIPE_CONFIG(ADDR_SURF_P2) |
3162 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163 				break;
3164 			case 11:
3165 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3166 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3167 						 PIPE_CONFIG(ADDR_SURF_P2) |
3168 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169 				break;
3170 			case 12:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 13:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3178 						 PIPE_CONFIG(ADDR_SURF_P2) |
3179 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3180 				break;
3181 			case 14:
3182 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 						 PIPE_CONFIG(ADDR_SURF_P2) |
3185 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186 				break;
3187 			case 16:
3188 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3189 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192 				break;
3193 			case 17:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 27:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2));
3203 				break;
3204 			case 28:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3206 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3207 						 PIPE_CONFIG(ADDR_SURF_P2) |
3208 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209 				break;
3210 			case 29:
3211 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3212 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3213 						 PIPE_CONFIG(ADDR_SURF_P2) |
3214 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 				break;
3216 			case 30:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			default:
3223 				gb_tile_moden = 0;
3224 				break;
3225 			}
3226 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3227 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3228 		}
3229 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3230 			switch (reg_offset) {
3231 			case 0:
3232 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 						 NUM_BANKS(ADDR_SURF_16_BANK));
3236 				break;
3237 			case 1:
3238 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3239 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 						 NUM_BANKS(ADDR_SURF_16_BANK));
3242 				break;
3243 			case 2:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 3:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 4:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 5:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 6:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3271 						 NUM_BANKS(ADDR_SURF_8_BANK));
3272 				break;
3273 			case 8:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 9:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3283 						 NUM_BANKS(ADDR_SURF_16_BANK));
3284 				break;
3285 			case 10:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 11:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 12:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 13:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 14:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 						 NUM_BANKS(ADDR_SURF_8_BANK));
3314 				break;
3315 			default:
3316 				gb_tile_moden = 0;
3317 				break;
3318 			}
3319 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3320 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3321 		}
3322 	} else
3323 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3324 }
3325 
3326 /**
3327  * cik_select_se_sh - select which SE, SH to address
3328  *
3329  * @rdev: radeon_device pointer
3330  * @se_num: shader engine to address
3331  * @sh_num: sh block to address
3332  *
3333  * Select which SE, SH combinations to address. Certain
3334  * registers are instanced per SE or SH.  0xffffffff means
3335  * broadcast to all SEs or SHs (CIK).
3336  */
3337 static void cik_select_se_sh(struct radeon_device *rdev,
3338 			     u32 se_num, u32 sh_num)
3339 {
3340 	u32 data = INSTANCE_BROADCAST_WRITES;
3341 
3342 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3343 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3344 	else if (se_num == 0xffffffff)
3345 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3346 	else if (sh_num == 0xffffffff)
3347 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3348 	else
3349 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3350 	WREG32(GRBM_GFX_INDEX, data);
3351 }
3352 
3353 /**
3354  * cik_create_bitmask - create a bitmask
3355  *
3356  * @bit_width: length of the mask
3357  *
3358  * create a variable length bit mask (CIK).
3359  * Returns the bitmask.
3360  */
3361 static u32 cik_create_bitmask(u32 bit_width)
3362 {
3363 	u32 i, mask = 0;
3364 
3365 	for (i = 0; i < bit_width; i++) {
3366 		mask <<= 1;
3367 		mask |= 1;
3368 	}
3369 	return mask;
3370 }
3371 
3372 /**
3373  * cik_get_rb_disabled - computes the mask of disabled RBs
3374  *
3375  * @rdev: radeon_device pointer
3376  * @max_rb_num: max RBs (render backends) for the asic
3377  * @se_num: number of SEs (shader engines) for the asic
3378  * @sh_per_se: number of SH blocks per SE for the asic
3379  *
3380  * Calculates the bitmask of disabled RBs (CIK).
3381  * Returns the disabled RB bitmask.
3382  */
3383 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3384 			      u32 max_rb_num_per_se,
3385 			      u32 sh_per_se)
3386 {
3387 	u32 data, mask;
3388 
3389 	data = RREG32(CC_RB_BACKEND_DISABLE);
3390 	if (data & 1)
3391 		data &= BACKEND_DISABLE_MASK;
3392 	else
3393 		data = 0;
3394 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3395 
3396 	data >>= BACKEND_DISABLE_SHIFT;
3397 
3398 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3399 
3400 	return data & mask;
3401 }
3402 
3403 /**
3404  * cik_setup_rb - setup the RBs on the asic
3405  *
3406  * @rdev: radeon_device pointer
3407  * @se_num: number of SEs (shader engines) for the asic
3408  * @sh_per_se: number of SH blocks per SE for the asic
3409  * @max_rb_num: max RBs (render backends) for the asic
3410  *
3411  * Configures per-SE/SH RB registers (CIK).
3412  */
3413 static void cik_setup_rb(struct radeon_device *rdev,
3414 			 u32 se_num, u32 sh_per_se,
3415 			 u32 max_rb_num_per_se)
3416 {
3417 	int i, j;
3418 	u32 data, mask;
3419 	u32 disabled_rbs = 0;
3420 	u32 enabled_rbs = 0;
3421 
3422 	for (i = 0; i < se_num; i++) {
3423 		for (j = 0; j < sh_per_se; j++) {
3424 			cik_select_se_sh(rdev, i, j);
3425 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3426 			if (rdev->family == CHIP_HAWAII)
3427 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3428 			else
3429 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3430 		}
3431 	}
3432 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3433 
3434 	mask = 1;
3435 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3436 		if (!(disabled_rbs & mask))
3437 			enabled_rbs |= mask;
3438 		mask <<= 1;
3439 	}
3440 
3441 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3442 
3443 	for (i = 0; i < se_num; i++) {
3444 		cik_select_se_sh(rdev, i, 0xffffffff);
3445 		data = 0;
3446 		for (j = 0; j < sh_per_se; j++) {
3447 			switch (enabled_rbs & 3) {
3448 			case 0:
3449 				if (j == 0)
3450 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3451 				else
3452 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3453 				break;
3454 			case 1:
3455 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3456 				break;
3457 			case 2:
3458 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3459 				break;
3460 			case 3:
3461 			default:
3462 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3463 				break;
3464 			}
3465 			enabled_rbs >>= 2;
3466 		}
3467 		WREG32(PA_SC_RASTER_CONFIG, data);
3468 	}
3469 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3470 }
3471 
3472 /**
3473  * cik_gpu_init - setup the 3D engine
3474  *
3475  * @rdev: radeon_device pointer
3476  *
3477  * Configures the 3D engine and tiling configuration
3478  * registers so that the 3D engine is usable.
3479  */
3480 static void cik_gpu_init(struct radeon_device *rdev)
3481 {
3482 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3483 	u32 mc_shared_chmap, mc_arb_ramcfg;
3484 	u32 hdp_host_path_cntl;
3485 	u32 tmp;
3486 	int i, j;
3487 
3488 	switch (rdev->family) {
3489 	case CHIP_BONAIRE:
3490 		rdev->config.cik.max_shader_engines = 2;
3491 		rdev->config.cik.max_tile_pipes = 4;
3492 		rdev->config.cik.max_cu_per_sh = 7;
3493 		rdev->config.cik.max_sh_per_se = 1;
3494 		rdev->config.cik.max_backends_per_se = 2;
3495 		rdev->config.cik.max_texture_channel_caches = 4;
3496 		rdev->config.cik.max_gprs = 256;
3497 		rdev->config.cik.max_gs_threads = 32;
3498 		rdev->config.cik.max_hw_contexts = 8;
3499 
3500 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3501 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3502 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3503 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3504 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3505 		break;
3506 	case CHIP_HAWAII:
3507 		rdev->config.cik.max_shader_engines = 4;
3508 		rdev->config.cik.max_tile_pipes = 16;
3509 		rdev->config.cik.max_cu_per_sh = 11;
3510 		rdev->config.cik.max_sh_per_se = 1;
3511 		rdev->config.cik.max_backends_per_se = 4;
3512 		rdev->config.cik.max_texture_channel_caches = 16;
3513 		rdev->config.cik.max_gprs = 256;
3514 		rdev->config.cik.max_gs_threads = 32;
3515 		rdev->config.cik.max_hw_contexts = 8;
3516 
3517 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3522 		break;
3523 	case CHIP_KAVERI:
3524 		rdev->config.cik.max_shader_engines = 1;
3525 		rdev->config.cik.max_tile_pipes = 4;
3526 		if ((rdev->pdev->device == 0x1304) ||
3527 		    (rdev->pdev->device == 0x1305) ||
3528 		    (rdev->pdev->device == 0x130C) ||
3529 		    (rdev->pdev->device == 0x130F) ||
3530 		    (rdev->pdev->device == 0x1310) ||
3531 		    (rdev->pdev->device == 0x1311) ||
3532 		    (rdev->pdev->device == 0x131C)) {
3533 			rdev->config.cik.max_cu_per_sh = 8;
3534 			rdev->config.cik.max_backends_per_se = 2;
3535 		} else if ((rdev->pdev->device == 0x1309) ||
3536 			   (rdev->pdev->device == 0x130A) ||
3537 			   (rdev->pdev->device == 0x130D) ||
3538 			   (rdev->pdev->device == 0x1313) ||
3539 			   (rdev->pdev->device == 0x131D)) {
3540 			rdev->config.cik.max_cu_per_sh = 6;
3541 			rdev->config.cik.max_backends_per_se = 2;
3542 		} else if ((rdev->pdev->device == 0x1306) ||
3543 			   (rdev->pdev->device == 0x1307) ||
3544 			   (rdev->pdev->device == 0x130B) ||
3545 			   (rdev->pdev->device == 0x130E) ||
3546 			   (rdev->pdev->device == 0x1315) ||
3547 			   (rdev->pdev->device == 0x1318) ||
3548 			   (rdev->pdev->device == 0x131B)) {
3549 			rdev->config.cik.max_cu_per_sh = 4;
3550 			rdev->config.cik.max_backends_per_se = 1;
3551 		} else {
3552 			rdev->config.cik.max_cu_per_sh = 3;
3553 			rdev->config.cik.max_backends_per_se = 1;
3554 		}
3555 		rdev->config.cik.max_sh_per_se = 1;
3556 		rdev->config.cik.max_texture_channel_caches = 4;
3557 		rdev->config.cik.max_gprs = 256;
3558 		rdev->config.cik.max_gs_threads = 16;
3559 		rdev->config.cik.max_hw_contexts = 8;
3560 
3561 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3562 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3563 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3564 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3565 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3566 		break;
3567 	case CHIP_KABINI:
3568 	case CHIP_MULLINS:
3569 	default:
3570 		rdev->config.cik.max_shader_engines = 1;
3571 		rdev->config.cik.max_tile_pipes = 2;
3572 		rdev->config.cik.max_cu_per_sh = 2;
3573 		rdev->config.cik.max_sh_per_se = 1;
3574 		rdev->config.cik.max_backends_per_se = 1;
3575 		rdev->config.cik.max_texture_channel_caches = 2;
3576 		rdev->config.cik.max_gprs = 256;
3577 		rdev->config.cik.max_gs_threads = 16;
3578 		rdev->config.cik.max_hw_contexts = 8;
3579 
3580 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3581 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3582 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3583 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3584 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3585 		break;
3586 	}
3587 
3588 	/* Initialize HDP */
3589 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3590 		WREG32((0x2c14 + j), 0x00000000);
3591 		WREG32((0x2c18 + j), 0x00000000);
3592 		WREG32((0x2c1c + j), 0x00000000);
3593 		WREG32((0x2c20 + j), 0x00000000);
3594 		WREG32((0x2c24 + j), 0x00000000);
3595 	}
3596 
3597 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3598 
3599 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3600 
3601 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3602 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3603 
3604 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3605 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3606 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3607 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3608 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3609 		rdev->config.cik.mem_row_size_in_kb = 4;
3610 	/* XXX use MC settings? */
3611 	rdev->config.cik.shader_engine_tile_size = 32;
3612 	rdev->config.cik.num_gpus = 1;
3613 	rdev->config.cik.multi_gpu_tile_size = 64;
3614 
3615 	/* fix up row size */
3616 	gb_addr_config &= ~ROW_SIZE_MASK;
3617 	switch (rdev->config.cik.mem_row_size_in_kb) {
3618 	case 1:
3619 	default:
3620 		gb_addr_config |= ROW_SIZE(0);
3621 		break;
3622 	case 2:
3623 		gb_addr_config |= ROW_SIZE(1);
3624 		break;
3625 	case 4:
3626 		gb_addr_config |= ROW_SIZE(2);
3627 		break;
3628 	}
3629 
3630 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3631 	 * not have bank info, so create a custom tiling dword.
3632 	 * bits 3:0   num_pipes
3633 	 * bits 7:4   num_banks
3634 	 * bits 11:8  group_size
3635 	 * bits 15:12 row_size
3636 	 */
3637 	rdev->config.cik.tile_config = 0;
3638 	switch (rdev->config.cik.num_tile_pipes) {
3639 	case 1:
3640 		rdev->config.cik.tile_config |= (0 << 0);
3641 		break;
3642 	case 2:
3643 		rdev->config.cik.tile_config |= (1 << 0);
3644 		break;
3645 	case 4:
3646 		rdev->config.cik.tile_config |= (2 << 0);
3647 		break;
3648 	case 8:
3649 	default:
3650 		/* XXX what about 12? */
3651 		rdev->config.cik.tile_config |= (3 << 0);
3652 		break;
3653 	}
3654 	rdev->config.cik.tile_config |=
3655 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3656 	rdev->config.cik.tile_config |=
3657 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3658 	rdev->config.cik.tile_config |=
3659 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3660 
3661 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3662 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3663 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3664 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3665 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3666 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3667 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3668 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3669 
3670 	cik_tiling_mode_table_init(rdev);
3671 
3672 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3673 		     rdev->config.cik.max_sh_per_se,
3674 		     rdev->config.cik.max_backends_per_se);
3675 
3676 	rdev->config.cik.active_cus = 0;
3677 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3678 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3679 			rdev->config.cik.active_cus +=
3680 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3681 		}
3682 	}
3683 
3684 	/* set HW defaults for 3D engine */
3685 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3686 
3687 	WREG32(SX_DEBUG_1, 0x20);
3688 
3689 	WREG32(TA_CNTL_AUX, 0x00010000);
3690 
3691 	tmp = RREG32(SPI_CONFIG_CNTL);
3692 	tmp |= 0x03000000;
3693 	WREG32(SPI_CONFIG_CNTL, tmp);
3694 
3695 	WREG32(SQ_CONFIG, 1);
3696 
3697 	WREG32(DB_DEBUG, 0);
3698 
3699 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3700 	tmp |= 0x00000400;
3701 	WREG32(DB_DEBUG2, tmp);
3702 
3703 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3704 	tmp |= 0x00020200;
3705 	WREG32(DB_DEBUG3, tmp);
3706 
3707 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3708 	tmp |= 0x00018208;
3709 	WREG32(CB_HW_CONTROL, tmp);
3710 
3711 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3712 
3713 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3714 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3715 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3716 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3717 
3718 	WREG32(VGT_NUM_INSTANCES, 1);
3719 
3720 	WREG32(CP_PERFMON_CNTL, 0);
3721 
3722 	WREG32(SQ_CONFIG, 0);
3723 
3724 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3725 					  FORCE_EOV_MAX_REZ_CNT(255)));
3726 
3727 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3728 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3729 
3730 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3731 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3732 
3733 	tmp = RREG32(HDP_MISC_CNTL);
3734 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3735 	WREG32(HDP_MISC_CNTL, tmp);
3736 
3737 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3738 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3739 
3740 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3741 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3742 
3743 	udelay(50);
3744 }
3745 
3746 /*
3747  * GPU scratch registers helpers function.
3748  */
3749 /**
3750  * cik_scratch_init - setup driver info for CP scratch regs
3751  *
3752  * @rdev: radeon_device pointer
3753  *
3754  * Set up the number and offset of the CP scratch registers.
3755  * NOTE: use of CP scratch registers is a legacy inferface and
3756  * is not used by default on newer asics (r6xx+).  On newer asics,
3757  * memory buffers are used for fences rather than scratch regs.
3758  */
3759 static void cik_scratch_init(struct radeon_device *rdev)
3760 {
3761 	int i;
3762 
3763 	rdev->scratch.num_reg = 7;
3764 	rdev->scratch.reg_base = SCRATCH_REG0;
3765 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3766 		rdev->scratch.free[i] = true;
3767 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3768 	}
3769 }
3770 
3771 /**
3772  * cik_ring_test - basic gfx ring test
3773  *
3774  * @rdev: radeon_device pointer
3775  * @ring: radeon_ring structure holding ring information
3776  *
3777  * Allocate a scratch register and write to it using the gfx ring (CIK).
3778  * Provides a basic gfx ring test to verify that the ring is working.
3779  * Used by cik_cp_gfx_resume();
3780  * Returns 0 on success, error on failure.
3781  */
3782 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3783 {
3784 	uint32_t scratch;
3785 	uint32_t tmp = 0;
3786 	unsigned i;
3787 	int r;
3788 
3789 	r = radeon_scratch_get(rdev, &scratch);
3790 	if (r) {
3791 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3792 		return r;
3793 	}
3794 	WREG32(scratch, 0xCAFEDEAD);
3795 	r = radeon_ring_lock(rdev, ring, 3);
3796 	if (r) {
3797 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3798 		radeon_scratch_free(rdev, scratch);
3799 		return r;
3800 	}
3801 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3802 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3803 	radeon_ring_write(ring, 0xDEADBEEF);
3804 	radeon_ring_unlock_commit(rdev, ring, false);
3805 
3806 	for (i = 0; i < rdev->usec_timeout; i++) {
3807 		tmp = RREG32(scratch);
3808 		if (tmp == 0xDEADBEEF)
3809 			break;
3810 		DRM_UDELAY(1);
3811 	}
3812 	if (i < rdev->usec_timeout) {
3813 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3814 	} else {
3815 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3816 			  ring->idx, scratch, tmp);
3817 		r = -EINVAL;
3818 	}
3819 	radeon_scratch_free(rdev, scratch);
3820 	return r;
3821 }
3822 
3823 /**
3824  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3825  *
3826  * @rdev: radeon_device pointer
3827  * @ridx: radeon ring index
3828  *
3829  * Emits an hdp flush on the cp.
3830  */
3831 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3832 				       int ridx)
3833 {
3834 	struct radeon_ring *ring = &rdev->ring[ridx];
3835 	u32 ref_and_mask;
3836 
3837 	switch (ring->idx) {
3838 	case CAYMAN_RING_TYPE_CP1_INDEX:
3839 	case CAYMAN_RING_TYPE_CP2_INDEX:
3840 	default:
3841 		switch (ring->me) {
3842 		case 0:
3843 			ref_and_mask = CP2 << ring->pipe;
3844 			break;
3845 		case 1:
3846 			ref_and_mask = CP6 << ring->pipe;
3847 			break;
3848 		default:
3849 			return;
3850 		}
3851 		break;
3852 	case RADEON_RING_TYPE_GFX_INDEX:
3853 		ref_and_mask = CP0;
3854 		break;
3855 	}
3856 
3857 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3858 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3859 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3860 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3861 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3862 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3863 	radeon_ring_write(ring, ref_and_mask);
3864 	radeon_ring_write(ring, ref_and_mask);
3865 	radeon_ring_write(ring, 0x20); /* poll interval */
3866 }
3867 
3868 /**
3869  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3870  *
3871  * @rdev: radeon_device pointer
3872  * @fence: radeon fence object
3873  *
3874  * Emits a fence sequnce number on the gfx ring and flushes
3875  * GPU caches.
3876  */
3877 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3878 			     struct radeon_fence *fence)
3879 {
3880 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3881 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3882 
3883 	/* EVENT_WRITE_EOP - flush caches, send int */
3884 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3885 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3886 				 EOP_TC_ACTION_EN |
3887 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3888 				 EVENT_INDEX(5)));
3889 	radeon_ring_write(ring, addr & 0xfffffffc);
3890 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3891 	radeon_ring_write(ring, fence->seq);
3892 	radeon_ring_write(ring, 0);
3893 }
3894 
3895 /**
3896  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3897  *
3898  * @rdev: radeon_device pointer
3899  * @fence: radeon fence object
3900  *
3901  * Emits a fence sequnce number on the compute ring and flushes
3902  * GPU caches.
3903  */
3904 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3905 				 struct radeon_fence *fence)
3906 {
3907 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3908 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3909 
3910 	/* RELEASE_MEM - flush caches, send int */
3911 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3912 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3913 				 EOP_TC_ACTION_EN |
3914 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3915 				 EVENT_INDEX(5)));
3916 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3917 	radeon_ring_write(ring, addr & 0xfffffffc);
3918 	radeon_ring_write(ring, upper_32_bits(addr));
3919 	radeon_ring_write(ring, fence->seq);
3920 	radeon_ring_write(ring, 0);
3921 }
3922 
3923 /**
3924  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3925  *
3926  * @rdev: radeon_device pointer
3927  * @ring: radeon ring buffer object
3928  * @semaphore: radeon semaphore object
3929  * @emit_wait: Is this a sempahore wait?
3930  *
3931  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3932  * from running ahead of semaphore waits.
3933  */
3934 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3935 			     struct radeon_ring *ring,
3936 			     struct radeon_semaphore *semaphore,
3937 			     bool emit_wait)
3938 {
3939 	uint64_t addr = semaphore->gpu_addr;
3940 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3943 	radeon_ring_write(ring, lower_32_bits(addr));
3944 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3945 
3946 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3947 		/* Prevent the PFP from running ahead of the semaphore wait */
3948 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3949 		radeon_ring_write(ring, 0x0);
3950 	}
3951 
3952 	return true;
3953 }
3954 
3955 /**
3956  * cik_copy_cpdma - copy pages using the CP DMA engine
3957  *
3958  * @rdev: radeon_device pointer
3959  * @src_offset: src GPU address
3960  * @dst_offset: dst GPU address
3961  * @num_gpu_pages: number of GPU pages to xfer
3962  * @resv: reservation object to sync to
3963  *
3964  * Copy GPU paging using the CP DMA engine (CIK+).
3965  * Used by the radeon ttm implementation to move pages if
3966  * registered as the asic copy callback.
3967  */
3968 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3969 				    uint64_t src_offset, uint64_t dst_offset,
3970 				    unsigned num_gpu_pages,
3971 				    struct reservation_object *resv)
3972 {
3973 	struct radeon_semaphore *sem = NULL;
3974 	struct radeon_fence *fence;
3975 	int ring_index = rdev->asic->copy.blit_ring_index;
3976 	struct radeon_ring *ring = &rdev->ring[ring_index];
3977 	u32 size_in_bytes, cur_size_in_bytes, control;
3978 	int i, num_loops;
3979 	int r = 0;
3980 
3981 	r = radeon_semaphore_create(rdev, &sem);
3982 	if (r) {
3983 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3984 		return ERR_PTR(r);
3985 	}
3986 
3987 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3988 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3989 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3990 	if (r) {
3991 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3992 		radeon_semaphore_free(rdev, &sem, NULL);
3993 		return ERR_PTR(r);
3994 	}
3995 
3996 	radeon_semaphore_sync_resv(rdev, sem, resv, false);
3997 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3998 
3999 	for (i = 0; i < num_loops; i++) {
4000 		cur_size_in_bytes = size_in_bytes;
4001 		if (cur_size_in_bytes > 0x1fffff)
4002 			cur_size_in_bytes = 0x1fffff;
4003 		size_in_bytes -= cur_size_in_bytes;
4004 		control = 0;
4005 		if (size_in_bytes == 0)
4006 			control |= PACKET3_DMA_DATA_CP_SYNC;
4007 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4008 		radeon_ring_write(ring, control);
4009 		radeon_ring_write(ring, lower_32_bits(src_offset));
4010 		radeon_ring_write(ring, upper_32_bits(src_offset));
4011 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4012 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4013 		radeon_ring_write(ring, cur_size_in_bytes);
4014 		src_offset += cur_size_in_bytes;
4015 		dst_offset += cur_size_in_bytes;
4016 	}
4017 
4018 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4019 	if (r) {
4020 		radeon_ring_unlock_undo(rdev, ring);
4021 		radeon_semaphore_free(rdev, &sem, NULL);
4022 		return ERR_PTR(r);
4023 	}
4024 
4025 	radeon_ring_unlock_commit(rdev, ring, false);
4026 	radeon_semaphore_free(rdev, &sem, fence);
4027 
4028 	return fence;
4029 }
4030 
4031 /*
4032  * IB stuff
4033  */
4034 /**
4035  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4036  *
4037  * @rdev: radeon_device pointer
4038  * @ib: radeon indirect buffer object
4039  *
4040  * Emits an DE (drawing engine) or CE (constant engine) IB
4041  * on the gfx ring.  IBs are usually generated by userspace
4042  * acceleration drivers and submitted to the kernel for
4043  * sheduling on the ring.  This function schedules the IB
4044  * on the gfx ring for execution by the GPU.
4045  */
4046 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4047 {
4048 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4049 	u32 header, control = INDIRECT_BUFFER_VALID;
4050 
4051 	if (ib->is_const_ib) {
4052 		/* set switch buffer packet before const IB */
4053 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4054 		radeon_ring_write(ring, 0);
4055 
4056 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4057 	} else {
4058 		u32 next_rptr;
4059 		if (ring->rptr_save_reg) {
4060 			next_rptr = ring->wptr + 3 + 4;
4061 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4062 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4063 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4064 			radeon_ring_write(ring, next_rptr);
4065 		} else if (rdev->wb.enabled) {
4066 			next_rptr = ring->wptr + 5 + 4;
4067 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4068 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4069 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4070 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4071 			radeon_ring_write(ring, next_rptr);
4072 		}
4073 
4074 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4075 	}
4076 
4077 	control |= ib->length_dw |
4078 		(ib->vm ? (ib->vm->id << 24) : 0);
4079 
4080 	radeon_ring_write(ring, header);
4081 	radeon_ring_write(ring,
4082 #ifdef __BIG_ENDIAN
4083 			  (2 << 0) |
4084 #endif
4085 			  (ib->gpu_addr & 0xFFFFFFFC));
4086 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4087 	radeon_ring_write(ring, control);
4088 }
4089 
4090 /**
4091  * cik_ib_test - basic gfx ring IB test
4092  *
4093  * @rdev: radeon_device pointer
4094  * @ring: radeon_ring structure holding ring information
4095  *
4096  * Allocate an IB and execute it on the gfx ring (CIK).
4097  * Provides a basic gfx ring test to verify that IBs are working.
4098  * Returns 0 on success, error on failure.
4099  */
4100 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4101 {
4102 	struct radeon_ib ib;
4103 	uint32_t scratch;
4104 	uint32_t tmp = 0;
4105 	unsigned i;
4106 	int r;
4107 
4108 	r = radeon_scratch_get(rdev, &scratch);
4109 	if (r) {
4110 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4111 		return r;
4112 	}
4113 	WREG32(scratch, 0xCAFEDEAD);
4114 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4115 	if (r) {
4116 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4117 		radeon_scratch_free(rdev, scratch);
4118 		return r;
4119 	}
4120 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4121 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4122 	ib.ptr[2] = 0xDEADBEEF;
4123 	ib.length_dw = 3;
4124 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4125 	if (r) {
4126 		radeon_scratch_free(rdev, scratch);
4127 		radeon_ib_free(rdev, &ib);
4128 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4129 		return r;
4130 	}
4131 	r = radeon_fence_wait(ib.fence, false);
4132 	if (r) {
4133 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4134 		radeon_scratch_free(rdev, scratch);
4135 		radeon_ib_free(rdev, &ib);
4136 		return r;
4137 	}
4138 	for (i = 0; i < rdev->usec_timeout; i++) {
4139 		tmp = RREG32(scratch);
4140 		if (tmp == 0xDEADBEEF)
4141 			break;
4142 		DRM_UDELAY(1);
4143 	}
4144 	if (i < rdev->usec_timeout) {
4145 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4146 	} else {
4147 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4148 			  scratch, tmp);
4149 		r = -EINVAL;
4150 	}
4151 	radeon_scratch_free(rdev, scratch);
4152 	radeon_ib_free(rdev, &ib);
4153 	return r;
4154 }
4155 
4156 /*
4157  * CP.
4158  * On CIK, gfx and compute now have independant command processors.
4159  *
4160  * GFX
4161  * Gfx consists of a single ring and can process both gfx jobs and
4162  * compute jobs.  The gfx CP consists of three microengines (ME):
4163  * PFP - Pre-Fetch Parser
4164  * ME - Micro Engine
4165  * CE - Constant Engine
4166  * The PFP and ME make up what is considered the Drawing Engine (DE).
4167  * The CE is an asynchronous engine used for updating buffer desciptors
4168  * used by the DE so that they can be loaded into cache in parallel
4169  * while the DE is processing state update packets.
4170  *
4171  * Compute
4172  * The compute CP consists of two microengines (ME):
4173  * MEC1 - Compute MicroEngine 1
4174  * MEC2 - Compute MicroEngine 2
4175  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4176  * The queues are exposed to userspace and are programmed directly
4177  * by the compute runtime.
4178  */
4179 /**
4180  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4181  *
4182  * @rdev: radeon_device pointer
4183  * @enable: enable or disable the MEs
4184  *
4185  * Halts or unhalts the gfx MEs.
4186  */
4187 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4188 {
4189 	if (enable)
4190 		WREG32(CP_ME_CNTL, 0);
4191 	else {
4192 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4193 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4194 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4195 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4196 	}
4197 	udelay(50);
4198 }
4199 
4200 /**
4201  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4202  *
4203  * @rdev: radeon_device pointer
4204  *
4205  * Loads the gfx PFP, ME, and CE ucode.
4206  * Returns 0 for success, -EINVAL if the ucode is not available.
4207  */
4208 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4209 {
4210 	int i;
4211 
4212 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4213 		return -EINVAL;
4214 
4215 	cik_cp_gfx_enable(rdev, false);
4216 
4217 	if (rdev->new_fw) {
4218 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4219 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4220 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4221 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4222 		const struct gfx_firmware_header_v1_0 *me_hdr =
4223 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4224 		const __le32 *fw_data;
4225 		u32 fw_size;
4226 
4227 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4228 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4229 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4230 
4231 		/* PFP */
4232 		fw_data = (const __le32 *)
4233 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4234 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4235 		WREG32(CP_PFP_UCODE_ADDR, 0);
4236 		for (i = 0; i < fw_size; i++)
4237 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4238 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4239 
4240 		/* CE */
4241 		fw_data = (const __le32 *)
4242 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4243 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4244 		WREG32(CP_CE_UCODE_ADDR, 0);
4245 		for (i = 0; i < fw_size; i++)
4246 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4247 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4248 
4249 		/* ME */
4250 		fw_data = (const __be32 *)
4251 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4252 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4253 		WREG32(CP_ME_RAM_WADDR, 0);
4254 		for (i = 0; i < fw_size; i++)
4255 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4256 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4257 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4258 	} else {
4259 		const __be32 *fw_data;
4260 
4261 		/* PFP */
4262 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4263 		WREG32(CP_PFP_UCODE_ADDR, 0);
4264 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4265 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4266 		WREG32(CP_PFP_UCODE_ADDR, 0);
4267 
4268 		/* CE */
4269 		fw_data = (const __be32 *)rdev->ce_fw->data;
4270 		WREG32(CP_CE_UCODE_ADDR, 0);
4271 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4272 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4273 		WREG32(CP_CE_UCODE_ADDR, 0);
4274 
4275 		/* ME */
4276 		fw_data = (const __be32 *)rdev->me_fw->data;
4277 		WREG32(CP_ME_RAM_WADDR, 0);
4278 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4279 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4280 		WREG32(CP_ME_RAM_WADDR, 0);
4281 	}
4282 
4283 	return 0;
4284 }
4285 
4286 /**
4287  * cik_cp_gfx_start - start the gfx ring
4288  *
4289  * @rdev: radeon_device pointer
4290  *
4291  * Enables the ring and loads the clear state context and other
4292  * packets required to init the ring.
4293  * Returns 0 for success, error for failure.
4294  */
4295 static int cik_cp_gfx_start(struct radeon_device *rdev)
4296 {
4297 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4298 	int r, i;
4299 
4300 	/* init the CP */
4301 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4302 	WREG32(CP_ENDIAN_SWAP, 0);
4303 	WREG32(CP_DEVICE_ID, 1);
4304 
4305 	cik_cp_gfx_enable(rdev, true);
4306 
4307 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4308 	if (r) {
4309 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4310 		return r;
4311 	}
4312 
4313 	/* init the CE partitions.  CE only used for gfx on CIK */
4314 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4315 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4316 	radeon_ring_write(ring, 0xc000);
4317 	radeon_ring_write(ring, 0xc000);
4318 
4319 	/* setup clear context state */
4320 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4321 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4322 
4323 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4324 	radeon_ring_write(ring, 0x80000000);
4325 	radeon_ring_write(ring, 0x80000000);
4326 
4327 	for (i = 0; i < cik_default_size; i++)
4328 		radeon_ring_write(ring, cik_default_state[i]);
4329 
4330 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4331 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4332 
4333 	/* set clear context state */
4334 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4335 	radeon_ring_write(ring, 0);
4336 
4337 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4338 	radeon_ring_write(ring, 0x00000316);
4339 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4340 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4341 
4342 	radeon_ring_unlock_commit(rdev, ring, false);
4343 
4344 	return 0;
4345 }
4346 
4347 /**
4348  * cik_cp_gfx_fini - stop the gfx ring
4349  *
4350  * @rdev: radeon_device pointer
4351  *
4352  * Stop the gfx ring and tear down the driver ring
4353  * info.
4354  */
4355 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4356 {
4357 	cik_cp_gfx_enable(rdev, false);
4358 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4359 }
4360 
4361 /**
4362  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Program the location and size of the gfx ring buffer
4367  * and test it to make sure it's working.
4368  * Returns 0 for success, error for failure.
4369  */
4370 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4371 {
4372 	struct radeon_ring *ring;
4373 	u32 tmp;
4374 	u32 rb_bufsz;
4375 	u64 rb_addr;
4376 	int r;
4377 
4378 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4379 	if (rdev->family != CHIP_HAWAII)
4380 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4381 
4382 	/* Set the write pointer delay */
4383 	WREG32(CP_RB_WPTR_DELAY, 0);
4384 
4385 	/* set the RB to use vmid 0 */
4386 	WREG32(CP_RB_VMID, 0);
4387 
4388 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4389 
4390 	/* ring 0 - compute and gfx */
4391 	/* Set ring buffer size */
4392 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393 	rb_bufsz = order_base_2(ring->ring_size / 8);
4394 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4395 #ifdef __BIG_ENDIAN
4396 	tmp |= BUF_SWAP_32BIT;
4397 #endif
4398 	WREG32(CP_RB0_CNTL, tmp);
4399 
4400 	/* Initialize the ring buffer's read and write pointers */
4401 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4402 	ring->wptr = 0;
4403 	WREG32(CP_RB0_WPTR, ring->wptr);
4404 
4405 	/* set the wb address wether it's enabled or not */
4406 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4407 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4408 
4409 	/* scratch register shadowing is no longer supported */
4410 	WREG32(SCRATCH_UMSK, 0);
4411 
4412 	if (!rdev->wb.enabled)
4413 		tmp |= RB_NO_UPDATE;
4414 
4415 	mdelay(1);
4416 	WREG32(CP_RB0_CNTL, tmp);
4417 
4418 	rb_addr = ring->gpu_addr >> 8;
4419 	WREG32(CP_RB0_BASE, rb_addr);
4420 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4421 
4422 	/* start the ring */
4423 	cik_cp_gfx_start(rdev);
4424 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4425 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4426 	if (r) {
4427 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4428 		return r;
4429 	}
4430 
4431 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4432 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4433 
4434 	return 0;
4435 }
4436 
4437 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4438 		     struct radeon_ring *ring)
4439 {
4440 	u32 rptr;
4441 
4442 	if (rdev->wb.enabled)
4443 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4444 	else
4445 		rptr = RREG32(CP_RB0_RPTR);
4446 
4447 	return rptr;
4448 }
4449 
4450 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4451 		     struct radeon_ring *ring)
4452 {
4453 	u32 wptr;
4454 
4455 	wptr = RREG32(CP_RB0_WPTR);
4456 
4457 	return wptr;
4458 }
4459 
4460 void cik_gfx_set_wptr(struct radeon_device *rdev,
4461 		      struct radeon_ring *ring)
4462 {
4463 	WREG32(CP_RB0_WPTR, ring->wptr);
4464 	(void)RREG32(CP_RB0_WPTR);
4465 }
4466 
4467 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4468 			 struct radeon_ring *ring)
4469 {
4470 	u32 rptr;
4471 
4472 	if (rdev->wb.enabled) {
4473 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4474 	} else {
4475 		mutex_lock(&rdev->srbm_mutex);
4476 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4477 		rptr = RREG32(CP_HQD_PQ_RPTR);
4478 		cik_srbm_select(rdev, 0, 0, 0, 0);
4479 		mutex_unlock(&rdev->srbm_mutex);
4480 	}
4481 
4482 	return rptr;
4483 }
4484 
4485 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4486 			 struct radeon_ring *ring)
4487 {
4488 	u32 wptr;
4489 
4490 	if (rdev->wb.enabled) {
4491 		/* XXX check if swapping is necessary on BE */
4492 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4493 	} else {
4494 		mutex_lock(&rdev->srbm_mutex);
4495 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4496 		wptr = RREG32(CP_HQD_PQ_WPTR);
4497 		cik_srbm_select(rdev, 0, 0, 0, 0);
4498 		mutex_unlock(&rdev->srbm_mutex);
4499 	}
4500 
4501 	return wptr;
4502 }
4503 
4504 void cik_compute_set_wptr(struct radeon_device *rdev,
4505 			  struct radeon_ring *ring)
4506 {
4507 	/* XXX check if swapping is necessary on BE */
4508 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4509 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4510 }
4511 
4512 /**
4513  * cik_cp_compute_enable - enable/disable the compute CP MEs
4514  *
4515  * @rdev: radeon_device pointer
4516  * @enable: enable or disable the MEs
4517  *
4518  * Halts or unhalts the compute MEs.
4519  */
4520 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4521 {
4522 	if (enable)
4523 		WREG32(CP_MEC_CNTL, 0);
4524 	else {
4525 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4526 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4527 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4528 	}
4529 	udelay(50);
4530 }
4531 
4532 /**
4533  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4534  *
4535  * @rdev: radeon_device pointer
4536  *
4537  * Loads the compute MEC1&2 ucode.
4538  * Returns 0 for success, -EINVAL if the ucode is not available.
4539  */
4540 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4541 {
4542 	int i;
4543 
4544 	if (!rdev->mec_fw)
4545 		return -EINVAL;
4546 
4547 	cik_cp_compute_enable(rdev, false);
4548 
4549 	if (rdev->new_fw) {
4550 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4551 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4552 		const __le32 *fw_data;
4553 		u32 fw_size;
4554 
4555 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4556 
4557 		/* MEC1 */
4558 		fw_data = (const __le32 *)
4559 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4562 		for (i = 0; i < fw_size; i++)
4563 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4564 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4565 
4566 		/* MEC2 */
4567 		if (rdev->family == CHIP_KAVERI) {
4568 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4569 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4570 
4571 			fw_data = (const __le32 *)
4572 				(rdev->mec2_fw->data +
4573 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4574 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4575 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4576 			for (i = 0; i < fw_size; i++)
4577 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4578 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4579 		}
4580 	} else {
4581 		const __be32 *fw_data;
4582 
4583 		/* MEC1 */
4584 		fw_data = (const __be32 *)rdev->mec_fw->data;
4585 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4586 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4587 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4588 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4589 
4590 		if (rdev->family == CHIP_KAVERI) {
4591 			/* MEC2 */
4592 			fw_data = (const __be32 *)rdev->mec_fw->data;
4593 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4594 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4595 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4596 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4597 		}
4598 	}
4599 
4600 	return 0;
4601 }
4602 
4603 /**
4604  * cik_cp_compute_start - start the compute queues
4605  *
4606  * @rdev: radeon_device pointer
4607  *
4608  * Enable the compute queues.
4609  * Returns 0 for success, error for failure.
4610  */
4611 static int cik_cp_compute_start(struct radeon_device *rdev)
4612 {
4613 	cik_cp_compute_enable(rdev, true);
4614 
4615 	return 0;
4616 }
4617 
4618 /**
4619  * cik_cp_compute_fini - stop the compute queues
4620  *
4621  * @rdev: radeon_device pointer
4622  *
4623  * Stop the compute queues and tear down the driver queue
4624  * info.
4625  */
4626 static void cik_cp_compute_fini(struct radeon_device *rdev)
4627 {
4628 	int i, idx, r;
4629 
4630 	cik_cp_compute_enable(rdev, false);
4631 
4632 	for (i = 0; i < 2; i++) {
4633 		if (i == 0)
4634 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4635 		else
4636 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4637 
4638 		if (rdev->ring[idx].mqd_obj) {
4639 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4640 			if (unlikely(r != 0))
4641 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4642 
4643 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4644 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4645 
4646 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4647 			rdev->ring[idx].mqd_obj = NULL;
4648 		}
4649 	}
4650 }
4651 
4652 static void cik_mec_fini(struct radeon_device *rdev)
4653 {
4654 	int r;
4655 
4656 	if (rdev->mec.hpd_eop_obj) {
4657 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4658 		if (unlikely(r != 0))
4659 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4660 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4661 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4662 
4663 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4664 		rdev->mec.hpd_eop_obj = NULL;
4665 	}
4666 }
4667 
4668 #define MEC_HPD_SIZE 2048
4669 
4670 static int cik_mec_init(struct radeon_device *rdev)
4671 {
4672 	int r;
4673 	u32 *hpd;
4674 
4675 	/*
4676 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4677 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4678 	 */
4679 	if (rdev->family == CHIP_KAVERI)
4680 		rdev->mec.num_mec = 2;
4681 	else
4682 		rdev->mec.num_mec = 1;
4683 	rdev->mec.num_pipe = 4;
4684 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4685 
4686 	if (rdev->mec.hpd_eop_obj == NULL) {
4687 		r = radeon_bo_create(rdev,
4688 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4689 				     PAGE_SIZE, true,
4690 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4691 				     &rdev->mec.hpd_eop_obj);
4692 		if (r) {
4693 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4694 			return r;
4695 		}
4696 	}
4697 
4698 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4699 	if (unlikely(r != 0)) {
4700 		cik_mec_fini(rdev);
4701 		return r;
4702 	}
4703 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4704 			  &rdev->mec.hpd_eop_gpu_addr);
4705 	if (r) {
4706 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4707 		cik_mec_fini(rdev);
4708 		return r;
4709 	}
4710 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4711 	if (r) {
4712 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4713 		cik_mec_fini(rdev);
4714 		return r;
4715 	}
4716 
4717 	/* clear memory.  Not sure if this is required or not */
4718 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4719 
4720 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4721 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4722 
4723 	return 0;
4724 }
4725 
4726 struct hqd_registers
4727 {
4728 	u32 cp_mqd_base_addr;
4729 	u32 cp_mqd_base_addr_hi;
4730 	u32 cp_hqd_active;
4731 	u32 cp_hqd_vmid;
4732 	u32 cp_hqd_persistent_state;
4733 	u32 cp_hqd_pipe_priority;
4734 	u32 cp_hqd_queue_priority;
4735 	u32 cp_hqd_quantum;
4736 	u32 cp_hqd_pq_base;
4737 	u32 cp_hqd_pq_base_hi;
4738 	u32 cp_hqd_pq_rptr;
4739 	u32 cp_hqd_pq_rptr_report_addr;
4740 	u32 cp_hqd_pq_rptr_report_addr_hi;
4741 	u32 cp_hqd_pq_wptr_poll_addr;
4742 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4743 	u32 cp_hqd_pq_doorbell_control;
4744 	u32 cp_hqd_pq_wptr;
4745 	u32 cp_hqd_pq_control;
4746 	u32 cp_hqd_ib_base_addr;
4747 	u32 cp_hqd_ib_base_addr_hi;
4748 	u32 cp_hqd_ib_rptr;
4749 	u32 cp_hqd_ib_control;
4750 	u32 cp_hqd_iq_timer;
4751 	u32 cp_hqd_iq_rptr;
4752 	u32 cp_hqd_dequeue_request;
4753 	u32 cp_hqd_dma_offload;
4754 	u32 cp_hqd_sema_cmd;
4755 	u32 cp_hqd_msg_type;
4756 	u32 cp_hqd_atomic0_preop_lo;
4757 	u32 cp_hqd_atomic0_preop_hi;
4758 	u32 cp_hqd_atomic1_preop_lo;
4759 	u32 cp_hqd_atomic1_preop_hi;
4760 	u32 cp_hqd_hq_scheduler0;
4761 	u32 cp_hqd_hq_scheduler1;
4762 	u32 cp_mqd_control;
4763 };
4764 
4765 struct bonaire_mqd
4766 {
4767 	u32 header;
4768 	u32 dispatch_initiator;
4769 	u32 dimensions[3];
4770 	u32 start_idx[3];
4771 	u32 num_threads[3];
4772 	u32 pipeline_stat_enable;
4773 	u32 perf_counter_enable;
4774 	u32 pgm[2];
4775 	u32 tba[2];
4776 	u32 tma[2];
4777 	u32 pgm_rsrc[2];
4778 	u32 vmid;
4779 	u32 resource_limits;
4780 	u32 static_thread_mgmt01[2];
4781 	u32 tmp_ring_size;
4782 	u32 static_thread_mgmt23[2];
4783 	u32 restart[3];
4784 	u32 thread_trace_enable;
4785 	u32 reserved1;
4786 	u32 user_data[16];
4787 	u32 vgtcs_invoke_count[2];
4788 	struct hqd_registers queue_state;
4789 	u32 dequeue_cntr;
4790 	u32 interrupt_queue[64];
4791 };
4792 
4793 /**
4794  * cik_cp_compute_resume - setup the compute queue registers
4795  *
4796  * @rdev: radeon_device pointer
4797  *
4798  * Program the compute queues and test them to make sure they
4799  * are working.
4800  * Returns 0 for success, error for failure.
4801  */
4802 static int cik_cp_compute_resume(struct radeon_device *rdev)
4803 {
4804 	int r, i, j, idx;
4805 	u32 tmp;
4806 	bool use_doorbell = true;
4807 	u64 hqd_gpu_addr;
4808 	u64 mqd_gpu_addr;
4809 	u64 eop_gpu_addr;
4810 	u64 wb_gpu_addr;
4811 	u32 *buf;
4812 	struct bonaire_mqd *mqd;
4813 
4814 	r = cik_cp_compute_start(rdev);
4815 	if (r)
4816 		return r;
4817 
4818 	/* fix up chicken bits */
4819 	tmp = RREG32(CP_CPF_DEBUG);
4820 	tmp |= (1 << 23);
4821 	WREG32(CP_CPF_DEBUG, tmp);
4822 
4823 	/* init the pipes */
4824 	mutex_lock(&rdev->srbm_mutex);
4825 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4826 		int me = (i < 4) ? 1 : 2;
4827 		int pipe = (i < 4) ? i : (i - 4);
4828 
4829 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4830 
4831 		cik_srbm_select(rdev, me, pipe, 0, 0);
4832 
4833 		/* write the EOP addr */
4834 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4835 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4836 
4837 		/* set the VMID assigned */
4838 		WREG32(CP_HPD_EOP_VMID, 0);
4839 
4840 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4841 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4842 		tmp &= ~EOP_SIZE_MASK;
4843 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4844 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4845 	}
4846 	cik_srbm_select(rdev, 0, 0, 0, 0);
4847 	mutex_unlock(&rdev->srbm_mutex);
4848 
4849 	/* init the queues.  Just two for now. */
4850 	for (i = 0; i < 2; i++) {
4851 		if (i == 0)
4852 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4853 		else
4854 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4855 
4856 		if (rdev->ring[idx].mqd_obj == NULL) {
4857 			r = radeon_bo_create(rdev,
4858 					     sizeof(struct bonaire_mqd),
4859 					     PAGE_SIZE, true,
4860 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4861 					     NULL, &rdev->ring[idx].mqd_obj);
4862 			if (r) {
4863 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4864 				return r;
4865 			}
4866 		}
4867 
4868 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4869 		if (unlikely(r != 0)) {
4870 			cik_cp_compute_fini(rdev);
4871 			return r;
4872 		}
4873 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4874 				  &mqd_gpu_addr);
4875 		if (r) {
4876 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4877 			cik_cp_compute_fini(rdev);
4878 			return r;
4879 		}
4880 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4881 		if (r) {
4882 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4883 			cik_cp_compute_fini(rdev);
4884 			return r;
4885 		}
4886 
4887 		/* init the mqd struct */
4888 		memset(buf, 0, sizeof(struct bonaire_mqd));
4889 
4890 		mqd = (struct bonaire_mqd *)buf;
4891 		mqd->header = 0xC0310800;
4892 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4893 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4894 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4895 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4896 
4897 		mutex_lock(&rdev->srbm_mutex);
4898 		cik_srbm_select(rdev, rdev->ring[idx].me,
4899 				rdev->ring[idx].pipe,
4900 				rdev->ring[idx].queue, 0);
4901 
4902 		/* disable wptr polling */
4903 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4904 		tmp &= ~WPTR_POLL_EN;
4905 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4906 
4907 		/* enable doorbell? */
4908 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4909 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4910 		if (use_doorbell)
4911 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4912 		else
4913 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4914 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4915 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4916 
4917 		/* disable the queue if it's active */
4918 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4919 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4920 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4921 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4922 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4923 			for (j = 0; j < rdev->usec_timeout; j++) {
4924 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4925 					break;
4926 				udelay(1);
4927 			}
4928 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4929 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4930 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4931 		}
4932 
4933 		/* set the pointer to the MQD */
4934 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4935 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4936 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4937 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4938 		/* set MQD vmid to 0 */
4939 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4940 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4941 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4942 
4943 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4944 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4945 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4946 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4947 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4948 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4949 
4950 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4951 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4952 		mqd->queue_state.cp_hqd_pq_control &=
4953 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4954 
4955 		mqd->queue_state.cp_hqd_pq_control |=
4956 			order_base_2(rdev->ring[idx].ring_size / 8);
4957 		mqd->queue_state.cp_hqd_pq_control |=
4958 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4959 #ifdef __BIG_ENDIAN
4960 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4961 #endif
4962 		mqd->queue_state.cp_hqd_pq_control &=
4963 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4964 		mqd->queue_state.cp_hqd_pq_control |=
4965 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4966 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4967 
4968 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4969 		if (i == 0)
4970 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4971 		else
4972 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4973 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4974 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4975 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4976 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4977 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4978 
4979 		/* set the wb address wether it's enabled or not */
4980 		if (i == 0)
4981 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4982 		else
4983 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4984 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4985 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4986 			upper_32_bits(wb_gpu_addr) & 0xffff;
4987 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4988 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4989 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4990 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4991 
4992 		/* enable the doorbell if requested */
4993 		if (use_doorbell) {
4994 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4995 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4996 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4997 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4998 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4999 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5000 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5001 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5002 
5003 		} else {
5004 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5005 		}
5006 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5007 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5008 
5009 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5010 		rdev->ring[idx].wptr = 0;
5011 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5012 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5013 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5014 
5015 		/* set the vmid for the queue */
5016 		mqd->queue_state.cp_hqd_vmid = 0;
5017 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5018 
5019 		/* activate the queue */
5020 		mqd->queue_state.cp_hqd_active = 1;
5021 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5022 
5023 		cik_srbm_select(rdev, 0, 0, 0, 0);
5024 		mutex_unlock(&rdev->srbm_mutex);
5025 
5026 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5027 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5028 
5029 		rdev->ring[idx].ready = true;
5030 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5031 		if (r)
5032 			rdev->ring[idx].ready = false;
5033 	}
5034 
5035 	return 0;
5036 }
5037 
5038 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5039 {
5040 	cik_cp_gfx_enable(rdev, enable);
5041 	cik_cp_compute_enable(rdev, enable);
5042 }
5043 
5044 static int cik_cp_load_microcode(struct radeon_device *rdev)
5045 {
5046 	int r;
5047 
5048 	r = cik_cp_gfx_load_microcode(rdev);
5049 	if (r)
5050 		return r;
5051 	r = cik_cp_compute_load_microcode(rdev);
5052 	if (r)
5053 		return r;
5054 
5055 	return 0;
5056 }
5057 
5058 static void cik_cp_fini(struct radeon_device *rdev)
5059 {
5060 	cik_cp_gfx_fini(rdev);
5061 	cik_cp_compute_fini(rdev);
5062 }
5063 
5064 static int cik_cp_resume(struct radeon_device *rdev)
5065 {
5066 	int r;
5067 
5068 	cik_enable_gui_idle_interrupt(rdev, false);
5069 
5070 	r = cik_cp_load_microcode(rdev);
5071 	if (r)
5072 		return r;
5073 
5074 	r = cik_cp_gfx_resume(rdev);
5075 	if (r)
5076 		return r;
5077 	r = cik_cp_compute_resume(rdev);
5078 	if (r)
5079 		return r;
5080 
5081 	cik_enable_gui_idle_interrupt(rdev, true);
5082 
5083 	return 0;
5084 }
5085 
5086 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5087 {
5088 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5089 		RREG32(GRBM_STATUS));
5090 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5091 		RREG32(GRBM_STATUS2));
5092 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5093 		RREG32(GRBM_STATUS_SE0));
5094 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5095 		RREG32(GRBM_STATUS_SE1));
5096 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5097 		RREG32(GRBM_STATUS_SE2));
5098 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5099 		RREG32(GRBM_STATUS_SE3));
5100 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5101 		RREG32(SRBM_STATUS));
5102 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5103 		RREG32(SRBM_STATUS2));
5104 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5105 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5106 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5107 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5108 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5109 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5110 		 RREG32(CP_STALLED_STAT1));
5111 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5112 		 RREG32(CP_STALLED_STAT2));
5113 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5114 		 RREG32(CP_STALLED_STAT3));
5115 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5116 		 RREG32(CP_CPF_BUSY_STAT));
5117 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5118 		 RREG32(CP_CPF_STALLED_STAT1));
5119 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5120 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5121 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5122 		 RREG32(CP_CPC_STALLED_STAT1));
5123 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5124 }
5125 
5126 /**
5127  * cik_gpu_check_soft_reset - check which blocks are busy
5128  *
5129  * @rdev: radeon_device pointer
5130  *
5131  * Check which blocks are busy and return the relevant reset
5132  * mask to be used by cik_gpu_soft_reset().
5133  * Returns a mask of the blocks to be reset.
5134  */
5135 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5136 {
5137 	u32 reset_mask = 0;
5138 	u32 tmp;
5139 
5140 	/* GRBM_STATUS */
5141 	tmp = RREG32(GRBM_STATUS);
5142 	if (tmp & (PA_BUSY | SC_BUSY |
5143 		   BCI_BUSY | SX_BUSY |
5144 		   TA_BUSY | VGT_BUSY |
5145 		   DB_BUSY | CB_BUSY |
5146 		   GDS_BUSY | SPI_BUSY |
5147 		   IA_BUSY | IA_BUSY_NO_DMA))
5148 		reset_mask |= RADEON_RESET_GFX;
5149 
5150 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5151 		reset_mask |= RADEON_RESET_CP;
5152 
5153 	/* GRBM_STATUS2 */
5154 	tmp = RREG32(GRBM_STATUS2);
5155 	if (tmp & RLC_BUSY)
5156 		reset_mask |= RADEON_RESET_RLC;
5157 
5158 	/* SDMA0_STATUS_REG */
5159 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5160 	if (!(tmp & SDMA_IDLE))
5161 		reset_mask |= RADEON_RESET_DMA;
5162 
5163 	/* SDMA1_STATUS_REG */
5164 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5165 	if (!(tmp & SDMA_IDLE))
5166 		reset_mask |= RADEON_RESET_DMA1;
5167 
5168 	/* SRBM_STATUS2 */
5169 	tmp = RREG32(SRBM_STATUS2);
5170 	if (tmp & SDMA_BUSY)
5171 		reset_mask |= RADEON_RESET_DMA;
5172 
5173 	if (tmp & SDMA1_BUSY)
5174 		reset_mask |= RADEON_RESET_DMA1;
5175 
5176 	/* SRBM_STATUS */
5177 	tmp = RREG32(SRBM_STATUS);
5178 
5179 	if (tmp & IH_BUSY)
5180 		reset_mask |= RADEON_RESET_IH;
5181 
5182 	if (tmp & SEM_BUSY)
5183 		reset_mask |= RADEON_RESET_SEM;
5184 
5185 	if (tmp & GRBM_RQ_PENDING)
5186 		reset_mask |= RADEON_RESET_GRBM;
5187 
5188 	if (tmp & VMC_BUSY)
5189 		reset_mask |= RADEON_RESET_VMC;
5190 
5191 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5192 		   MCC_BUSY | MCD_BUSY))
5193 		reset_mask |= RADEON_RESET_MC;
5194 
5195 	if (evergreen_is_display_hung(rdev))
5196 		reset_mask |= RADEON_RESET_DISPLAY;
5197 
5198 	/* Skip MC reset as it's mostly likely not hung, just busy */
5199 	if (reset_mask & RADEON_RESET_MC) {
5200 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5201 		reset_mask &= ~RADEON_RESET_MC;
5202 	}
5203 
5204 	return reset_mask;
5205 }
5206 
5207 /**
5208  * cik_gpu_soft_reset - soft reset GPU
5209  *
5210  * @rdev: radeon_device pointer
5211  * @reset_mask: mask of which blocks to reset
5212  *
5213  * Soft reset the blocks specified in @reset_mask.
5214  */
5215 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5216 {
5217 	struct evergreen_mc_save save;
5218 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5219 	u32 tmp;
5220 
5221 	if (reset_mask == 0)
5222 		return;
5223 
5224 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5225 
5226 	cik_print_gpu_status_regs(rdev);
5227 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5228 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5229 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5230 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5231 
5232 	/* disable CG/PG */
5233 	cik_fini_pg(rdev);
5234 	cik_fini_cg(rdev);
5235 
5236 	/* stop the rlc */
5237 	cik_rlc_stop(rdev);
5238 
5239 	/* Disable GFX parsing/prefetching */
5240 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5241 
5242 	/* Disable MEC parsing/prefetching */
5243 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5244 
5245 	if (reset_mask & RADEON_RESET_DMA) {
5246 		/* sdma0 */
5247 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5248 		tmp |= SDMA_HALT;
5249 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5250 	}
5251 	if (reset_mask & RADEON_RESET_DMA1) {
5252 		/* sdma1 */
5253 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5254 		tmp |= SDMA_HALT;
5255 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5256 	}
5257 
5258 	evergreen_mc_stop(rdev, &save);
5259 	if (evergreen_mc_wait_for_idle(rdev)) {
5260 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5261 	}
5262 
5263 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5264 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5265 
5266 	if (reset_mask & RADEON_RESET_CP) {
5267 		grbm_soft_reset |= SOFT_RESET_CP;
5268 
5269 		srbm_soft_reset |= SOFT_RESET_GRBM;
5270 	}
5271 
5272 	if (reset_mask & RADEON_RESET_DMA)
5273 		srbm_soft_reset |= SOFT_RESET_SDMA;
5274 
5275 	if (reset_mask & RADEON_RESET_DMA1)
5276 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5277 
5278 	if (reset_mask & RADEON_RESET_DISPLAY)
5279 		srbm_soft_reset |= SOFT_RESET_DC;
5280 
5281 	if (reset_mask & RADEON_RESET_RLC)
5282 		grbm_soft_reset |= SOFT_RESET_RLC;
5283 
5284 	if (reset_mask & RADEON_RESET_SEM)
5285 		srbm_soft_reset |= SOFT_RESET_SEM;
5286 
5287 	if (reset_mask & RADEON_RESET_IH)
5288 		srbm_soft_reset |= SOFT_RESET_IH;
5289 
5290 	if (reset_mask & RADEON_RESET_GRBM)
5291 		srbm_soft_reset |= SOFT_RESET_GRBM;
5292 
5293 	if (reset_mask & RADEON_RESET_VMC)
5294 		srbm_soft_reset |= SOFT_RESET_VMC;
5295 
5296 	if (!(rdev->flags & RADEON_IS_IGP)) {
5297 		if (reset_mask & RADEON_RESET_MC)
5298 			srbm_soft_reset |= SOFT_RESET_MC;
5299 	}
5300 
5301 	if (grbm_soft_reset) {
5302 		tmp = RREG32(GRBM_SOFT_RESET);
5303 		tmp |= grbm_soft_reset;
5304 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5305 		WREG32(GRBM_SOFT_RESET, tmp);
5306 		tmp = RREG32(GRBM_SOFT_RESET);
5307 
5308 		udelay(50);
5309 
5310 		tmp &= ~grbm_soft_reset;
5311 		WREG32(GRBM_SOFT_RESET, tmp);
5312 		tmp = RREG32(GRBM_SOFT_RESET);
5313 	}
5314 
5315 	if (srbm_soft_reset) {
5316 		tmp = RREG32(SRBM_SOFT_RESET);
5317 		tmp |= srbm_soft_reset;
5318 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5319 		WREG32(SRBM_SOFT_RESET, tmp);
5320 		tmp = RREG32(SRBM_SOFT_RESET);
5321 
5322 		udelay(50);
5323 
5324 		tmp &= ~srbm_soft_reset;
5325 		WREG32(SRBM_SOFT_RESET, tmp);
5326 		tmp = RREG32(SRBM_SOFT_RESET);
5327 	}
5328 
5329 	/* Wait a little for things to settle down */
5330 	udelay(50);
5331 
5332 	evergreen_mc_resume(rdev, &save);
5333 	udelay(50);
5334 
5335 	cik_print_gpu_status_regs(rdev);
5336 }
5337 
5338 struct kv_reset_save_regs {
5339 	u32 gmcon_reng_execute;
5340 	u32 gmcon_misc;
5341 	u32 gmcon_misc3;
5342 };
5343 
5344 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5345 				   struct kv_reset_save_regs *save)
5346 {
5347 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5348 	save->gmcon_misc = RREG32(GMCON_MISC);
5349 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5350 
5351 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5352 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5353 						STCTRL_STUTTER_EN));
5354 }
5355 
5356 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5357 				      struct kv_reset_save_regs *save)
5358 {
5359 	int i;
5360 
5361 	WREG32(GMCON_PGFSM_WRITE, 0);
5362 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5363 
5364 	for (i = 0; i < 5; i++)
5365 		WREG32(GMCON_PGFSM_WRITE, 0);
5366 
5367 	WREG32(GMCON_PGFSM_WRITE, 0);
5368 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5369 
5370 	for (i = 0; i < 5; i++)
5371 		WREG32(GMCON_PGFSM_WRITE, 0);
5372 
5373 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5374 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5375 
5376 	for (i = 0; i < 5; i++)
5377 		WREG32(GMCON_PGFSM_WRITE, 0);
5378 
5379 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5380 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5381 
5382 	for (i = 0; i < 5; i++)
5383 		WREG32(GMCON_PGFSM_WRITE, 0);
5384 
5385 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5386 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5387 
5388 	for (i = 0; i < 5; i++)
5389 		WREG32(GMCON_PGFSM_WRITE, 0);
5390 
5391 	WREG32(GMCON_PGFSM_WRITE, 0);
5392 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5393 
5394 	for (i = 0; i < 5; i++)
5395 		WREG32(GMCON_PGFSM_WRITE, 0);
5396 
5397 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5398 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5399 
5400 	for (i = 0; i < 5; i++)
5401 		WREG32(GMCON_PGFSM_WRITE, 0);
5402 
5403 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5404 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5405 
5406 	for (i = 0; i < 5; i++)
5407 		WREG32(GMCON_PGFSM_WRITE, 0);
5408 
5409 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5410 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5411 
5412 	for (i = 0; i < 5; i++)
5413 		WREG32(GMCON_PGFSM_WRITE, 0);
5414 
5415 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5416 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5417 
5418 	for (i = 0; i < 5; i++)
5419 		WREG32(GMCON_PGFSM_WRITE, 0);
5420 
5421 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5422 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5423 
5424 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5425 	WREG32(GMCON_MISC, save->gmcon_misc);
5426 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5427 }
5428 
5429 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5430 {
5431 	struct evergreen_mc_save save;
5432 	struct kv_reset_save_regs kv_save = { 0 };
5433 	u32 tmp, i;
5434 
5435 	dev_info(rdev->dev, "GPU pci config reset\n");
5436 
5437 	/* disable dpm? */
5438 
5439 	/* disable cg/pg */
5440 	cik_fini_pg(rdev);
5441 	cik_fini_cg(rdev);
5442 
5443 	/* Disable GFX parsing/prefetching */
5444 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5445 
5446 	/* Disable MEC parsing/prefetching */
5447 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5448 
5449 	/* sdma0 */
5450 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5451 	tmp |= SDMA_HALT;
5452 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5453 	/* sdma1 */
5454 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5455 	tmp |= SDMA_HALT;
5456 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5457 	/* XXX other engines? */
5458 
5459 	/* halt the rlc, disable cp internal ints */
5460 	cik_rlc_stop(rdev);
5461 
5462 	udelay(50);
5463 
5464 	/* disable mem access */
5465 	evergreen_mc_stop(rdev, &save);
5466 	if (evergreen_mc_wait_for_idle(rdev)) {
5467 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5468 	}
5469 
5470 	if (rdev->flags & RADEON_IS_IGP)
5471 		kv_save_regs_for_reset(rdev, &kv_save);
5472 
5473 	/* disable BM */
5474 	pci_clear_master(rdev->pdev);
5475 	/* reset */
5476 	radeon_pci_config_reset(rdev);
5477 
5478 	udelay(100);
5479 
5480 	/* wait for asic to come out of reset */
5481 	for (i = 0; i < rdev->usec_timeout; i++) {
5482 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5483 			break;
5484 		udelay(1);
5485 	}
5486 
5487 	/* does asic init need to be run first??? */
5488 	if (rdev->flags & RADEON_IS_IGP)
5489 		kv_restore_regs_for_reset(rdev, &kv_save);
5490 }
5491 
5492 /**
5493  * cik_asic_reset - soft reset GPU
5494  *
5495  * @rdev: radeon_device pointer
5496  *
5497  * Look up which blocks are hung and attempt
5498  * to reset them.
5499  * Returns 0 for success.
5500  */
5501 int cik_asic_reset(struct radeon_device *rdev)
5502 {
5503 	u32 reset_mask;
5504 
5505 	reset_mask = cik_gpu_check_soft_reset(rdev);
5506 
5507 	if (reset_mask)
5508 		r600_set_bios_scratch_engine_hung(rdev, true);
5509 
5510 	/* try soft reset */
5511 	cik_gpu_soft_reset(rdev, reset_mask);
5512 
5513 	reset_mask = cik_gpu_check_soft_reset(rdev);
5514 
5515 	/* try pci config reset */
5516 	if (reset_mask && radeon_hard_reset)
5517 		cik_gpu_pci_config_reset(rdev);
5518 
5519 	reset_mask = cik_gpu_check_soft_reset(rdev);
5520 
5521 	if (!reset_mask)
5522 		r600_set_bios_scratch_engine_hung(rdev, false);
5523 
5524 	return 0;
5525 }
5526 
5527 /**
5528  * cik_gfx_is_lockup - check if the 3D engine is locked up
5529  *
5530  * @rdev: radeon_device pointer
5531  * @ring: radeon_ring structure holding ring information
5532  *
5533  * Check if the 3D engine is locked up (CIK).
5534  * Returns true if the engine is locked, false if not.
5535  */
5536 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5537 {
5538 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5539 
5540 	if (!(reset_mask & (RADEON_RESET_GFX |
5541 			    RADEON_RESET_COMPUTE |
5542 			    RADEON_RESET_CP))) {
5543 		radeon_ring_lockup_update(rdev, ring);
5544 		return false;
5545 	}
5546 	return radeon_ring_test_lockup(rdev, ring);
5547 }
5548 
5549 /* MC */
5550 /**
5551  * cik_mc_program - program the GPU memory controller
5552  *
5553  * @rdev: radeon_device pointer
5554  *
5555  * Set the location of vram, gart, and AGP in the GPU's
5556  * physical address space (CIK).
5557  */
5558 static void cik_mc_program(struct radeon_device *rdev)
5559 {
5560 	struct evergreen_mc_save save;
5561 	u32 tmp;
5562 	int i, j;
5563 
5564 	/* Initialize HDP */
5565 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5566 		WREG32((0x2c14 + j), 0x00000000);
5567 		WREG32((0x2c18 + j), 0x00000000);
5568 		WREG32((0x2c1c + j), 0x00000000);
5569 		WREG32((0x2c20 + j), 0x00000000);
5570 		WREG32((0x2c24 + j), 0x00000000);
5571 	}
5572 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5573 
5574 	evergreen_mc_stop(rdev, &save);
5575 	if (radeon_mc_wait_for_idle(rdev)) {
5576 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5577 	}
5578 	/* Lockout access through VGA aperture*/
5579 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5580 	/* Update configuration */
5581 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5582 	       rdev->mc.vram_start >> 12);
5583 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5584 	       rdev->mc.vram_end >> 12);
5585 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5586 	       rdev->vram_scratch.gpu_addr >> 12);
5587 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5588 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5589 	WREG32(MC_VM_FB_LOCATION, tmp);
5590 	/* XXX double check these! */
5591 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5592 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5593 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5594 	WREG32(MC_VM_AGP_BASE, 0);
5595 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5596 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5597 	if (radeon_mc_wait_for_idle(rdev)) {
5598 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5599 	}
5600 	evergreen_mc_resume(rdev, &save);
5601 	/* we need to own VRAM, so turn off the VGA renderer here
5602 	 * to stop it overwriting our objects */
5603 	rv515_vga_render_disable(rdev);
5604 }
5605 
5606 /**
5607  * cik_mc_init - initialize the memory controller driver params
5608  *
5609  * @rdev: radeon_device pointer
5610  *
5611  * Look up the amount of vram, vram width, and decide how to place
5612  * vram and gart within the GPU's physical address space (CIK).
5613  * Returns 0 for success.
5614  */
5615 static int cik_mc_init(struct radeon_device *rdev)
5616 {
5617 	u32 tmp;
5618 	int chansize, numchan;
5619 
5620 	/* Get VRAM informations */
5621 	rdev->mc.vram_is_ddr = true;
5622 	tmp = RREG32(MC_ARB_RAMCFG);
5623 	if (tmp & CHANSIZE_MASK) {
5624 		chansize = 64;
5625 	} else {
5626 		chansize = 32;
5627 	}
5628 	tmp = RREG32(MC_SHARED_CHMAP);
5629 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5630 	case 0:
5631 	default:
5632 		numchan = 1;
5633 		break;
5634 	case 1:
5635 		numchan = 2;
5636 		break;
5637 	case 2:
5638 		numchan = 4;
5639 		break;
5640 	case 3:
5641 		numchan = 8;
5642 		break;
5643 	case 4:
5644 		numchan = 3;
5645 		break;
5646 	case 5:
5647 		numchan = 6;
5648 		break;
5649 	case 6:
5650 		numchan = 10;
5651 		break;
5652 	case 7:
5653 		numchan = 12;
5654 		break;
5655 	case 8:
5656 		numchan = 16;
5657 		break;
5658 	}
5659 	rdev->mc.vram_width = numchan * chansize;
5660 	/* Could aper size report 0 ? */
5661 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5662 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5663 	/* size in MB on si */
5664 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5665 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5666 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5667 	si_vram_gtt_location(rdev, &rdev->mc);
5668 	radeon_update_bandwidth_info(rdev);
5669 
5670 	return 0;
5671 }
5672 
5673 /*
5674  * GART
5675  * VMID 0 is the physical GPU addresses as used by the kernel.
5676  * VMIDs 1-15 are used for userspace clients and are handled
5677  * by the radeon vm/hsa code.
5678  */
5679 /**
5680  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5681  *
5682  * @rdev: radeon_device pointer
5683  *
5684  * Flush the TLB for the VMID 0 page table (CIK).
5685  */
5686 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5687 {
5688 	/* flush hdp cache */
5689 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5690 
5691 	/* bits 0-15 are the VM contexts0-15 */
5692 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5693 }
5694 
5695 /**
5696  * cik_pcie_gart_enable - gart enable
5697  *
5698  * @rdev: radeon_device pointer
5699  *
5700  * This sets up the TLBs, programs the page tables for VMID0,
5701  * sets up the hw for VMIDs 1-15 which are allocated on
5702  * demand, and sets up the global locations for the LDS, GDS,
5703  * and GPUVM for FSA64 clients (CIK).
5704  * Returns 0 for success, errors for failure.
5705  */
5706 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5707 {
5708 	int r, i;
5709 
5710 	if (rdev->gart.robj == NULL) {
5711 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5712 		return -EINVAL;
5713 	}
5714 	r = radeon_gart_table_vram_pin(rdev);
5715 	if (r)
5716 		return r;
5717 	/* Setup TLB control */
5718 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5719 	       (0xA << 7) |
5720 	       ENABLE_L1_TLB |
5721 	       ENABLE_L1_FRAGMENT_PROCESSING |
5722 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5723 	       ENABLE_ADVANCED_DRIVER_MODEL |
5724 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5725 	/* Setup L2 cache */
5726 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5727 	       ENABLE_L2_FRAGMENT_PROCESSING |
5728 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5729 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5730 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5731 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5732 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5733 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5734 	       BANK_SELECT(4) |
5735 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5736 	/* setup context0 */
5737 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5738 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5739 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5740 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5741 			(u32)(rdev->dummy_page.addr >> 12));
5742 	WREG32(VM_CONTEXT0_CNTL2, 0);
5743 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5744 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5745 
5746 	WREG32(0x15D4, 0);
5747 	WREG32(0x15D8, 0);
5748 	WREG32(0x15DC, 0);
5749 
5750 	/* restore context1-15 */
5751 	/* set vm size, must be a multiple of 4 */
5752 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5753 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5754 	for (i = 1; i < 16; i++) {
5755 		if (i < 8)
5756 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5757 			       rdev->vm_manager.saved_table_addr[i]);
5758 		else
5759 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5760 			       rdev->vm_manager.saved_table_addr[i]);
5761 	}
5762 
5763 	/* enable context1-15 */
5764 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5765 	       (u32)(rdev->dummy_page.addr >> 12));
5766 	WREG32(VM_CONTEXT1_CNTL2, 4);
5767 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5768 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5769 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5770 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5771 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5772 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5773 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5774 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5775 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5776 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5777 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5778 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5779 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5780 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5781 
5782 	if (rdev->family == CHIP_KAVERI) {
5783 		u32 tmp = RREG32(CHUB_CONTROL);
5784 		tmp &= ~BYPASS_VM;
5785 		WREG32(CHUB_CONTROL, tmp);
5786 	}
5787 
5788 	/* XXX SH_MEM regs */
5789 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5790 	mutex_lock(&rdev->srbm_mutex);
5791 	for (i = 0; i < 16; i++) {
5792 		cik_srbm_select(rdev, 0, 0, 0, i);
5793 		/* CP and shaders */
5794 		WREG32(SH_MEM_CONFIG, 0);
5795 		WREG32(SH_MEM_APE1_BASE, 1);
5796 		WREG32(SH_MEM_APE1_LIMIT, 0);
5797 		WREG32(SH_MEM_BASES, 0);
5798 		/* SDMA GFX */
5799 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5800 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5801 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5802 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5803 		/* XXX SDMA RLC - todo */
5804 	}
5805 	cik_srbm_select(rdev, 0, 0, 0, 0);
5806 	mutex_unlock(&rdev->srbm_mutex);
5807 
5808 	cik_pcie_gart_tlb_flush(rdev);
5809 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5810 		 (unsigned)(rdev->mc.gtt_size >> 20),
5811 		 (unsigned long long)rdev->gart.table_addr);
5812 	rdev->gart.ready = true;
5813 	return 0;
5814 }
5815 
5816 /**
5817  * cik_pcie_gart_disable - gart disable
5818  *
5819  * @rdev: radeon_device pointer
5820  *
5821  * This disables all VM page table (CIK).
5822  */
5823 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5824 {
5825 	unsigned i;
5826 
5827 	for (i = 1; i < 16; ++i) {
5828 		uint32_t reg;
5829 		if (i < 8)
5830 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5831 		else
5832 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5833 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5834 	}
5835 
5836 	/* Disable all tables */
5837 	WREG32(VM_CONTEXT0_CNTL, 0);
5838 	WREG32(VM_CONTEXT1_CNTL, 0);
5839 	/* Setup TLB control */
5840 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5841 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5842 	/* Setup L2 cache */
5843 	WREG32(VM_L2_CNTL,
5844 	       ENABLE_L2_FRAGMENT_PROCESSING |
5845 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5846 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5847 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5848 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5849 	WREG32(VM_L2_CNTL2, 0);
5850 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5851 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5852 	radeon_gart_table_vram_unpin(rdev);
5853 }
5854 
5855 /**
5856  * cik_pcie_gart_fini - vm fini callback
5857  *
5858  * @rdev: radeon_device pointer
5859  *
5860  * Tears down the driver GART/VM setup (CIK).
5861  */
5862 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5863 {
5864 	cik_pcie_gart_disable(rdev);
5865 	radeon_gart_table_vram_free(rdev);
5866 	radeon_gart_fini(rdev);
5867 }
5868 
5869 /* vm parser */
5870 /**
5871  * cik_ib_parse - vm ib_parse callback
5872  *
5873  * @rdev: radeon_device pointer
5874  * @ib: indirect buffer pointer
5875  *
5876  * CIK uses hw IB checking so this is a nop (CIK).
5877  */
5878 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5879 {
5880 	return 0;
5881 }
5882 
5883 /*
5884  * vm
5885  * VMID 0 is the physical GPU addresses as used by the kernel.
5886  * VMIDs 1-15 are used for userspace clients and are handled
5887  * by the radeon vm/hsa code.
5888  */
5889 /**
5890  * cik_vm_init - cik vm init callback
5891  *
5892  * @rdev: radeon_device pointer
5893  *
5894  * Inits cik specific vm parameters (number of VMs, base of vram for
5895  * VMIDs 1-15) (CIK).
5896  * Returns 0 for success.
5897  */
5898 int cik_vm_init(struct radeon_device *rdev)
5899 {
5900 	/* number of VMs */
5901 	rdev->vm_manager.nvm = 16;
5902 	/* base offset of vram pages */
5903 	if (rdev->flags & RADEON_IS_IGP) {
5904 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5905 		tmp <<= 22;
5906 		rdev->vm_manager.vram_base_offset = tmp;
5907 	} else
5908 		rdev->vm_manager.vram_base_offset = 0;
5909 
5910 	return 0;
5911 }
5912 
5913 /**
5914  * cik_vm_fini - cik vm fini callback
5915  *
5916  * @rdev: radeon_device pointer
5917  *
5918  * Tear down any asic specific VM setup (CIK).
5919  */
5920 void cik_vm_fini(struct radeon_device *rdev)
5921 {
5922 }
5923 
5924 /**
5925  * cik_vm_decode_fault - print human readable fault info
5926  *
5927  * @rdev: radeon_device pointer
5928  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5929  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5930  *
5931  * Print human readable fault information (CIK).
5932  */
5933 static void cik_vm_decode_fault(struct radeon_device *rdev,
5934 				u32 status, u32 addr, u32 mc_client)
5935 {
5936 	u32 mc_id;
5937 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5938 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5939 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5940 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5941 
5942 	if (rdev->family == CHIP_HAWAII)
5943 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5944 	else
5945 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5946 
5947 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5948 	       protections, vmid, addr,
5949 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5950 	       block, mc_client, mc_id);
5951 }
5952 
5953 /**
5954  * cik_vm_flush - cik vm flush using the CP
5955  *
5956  * @rdev: radeon_device pointer
5957  *
5958  * Update the page table base and flush the VM TLB
5959  * using the CP (CIK).
5960  */
5961 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5962 {
5963 	struct radeon_ring *ring = &rdev->ring[ridx];
5964 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5965 
5966 	if (vm == NULL)
5967 		return;
5968 
5969 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5970 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5971 				 WRITE_DATA_DST_SEL(0)));
5972 	if (vm->id < 8) {
5973 		radeon_ring_write(ring,
5974 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5975 	} else {
5976 		radeon_ring_write(ring,
5977 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5978 	}
5979 	radeon_ring_write(ring, 0);
5980 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5981 
5982 	/* update SH_MEM_* regs */
5983 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5984 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5985 				 WRITE_DATA_DST_SEL(0)));
5986 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5987 	radeon_ring_write(ring, 0);
5988 	radeon_ring_write(ring, VMID(vm->id));
5989 
5990 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5991 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5992 				 WRITE_DATA_DST_SEL(0)));
5993 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5994 	radeon_ring_write(ring, 0);
5995 
5996 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5997 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5998 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5999 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6000 
6001 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6002 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6003 				 WRITE_DATA_DST_SEL(0)));
6004 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6005 	radeon_ring_write(ring, 0);
6006 	radeon_ring_write(ring, VMID(0));
6007 
6008 	/* HDP flush */
6009 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
6010 
6011 	/* bits 0-15 are the VM contexts0-15 */
6012 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6013 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6014 				 WRITE_DATA_DST_SEL(0)));
6015 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6016 	radeon_ring_write(ring, 0);
6017 	radeon_ring_write(ring, 1 << vm->id);
6018 
6019 	/* compute doesn't have PFP */
6020 	if (usepfp) {
6021 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6022 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6023 		radeon_ring_write(ring, 0x0);
6024 	}
6025 }
6026 
6027 /*
6028  * RLC
6029  * The RLC is a multi-purpose microengine that handles a
6030  * variety of functions, the most important of which is
6031  * the interrupt controller.
6032  */
6033 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6034 					  bool enable)
6035 {
6036 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6037 
6038 	if (enable)
6039 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6040 	else
6041 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6042 	WREG32(CP_INT_CNTL_RING0, tmp);
6043 }
6044 
6045 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6046 {
6047 	u32 tmp;
6048 
6049 	tmp = RREG32(RLC_LB_CNTL);
6050 	if (enable)
6051 		tmp |= LOAD_BALANCE_ENABLE;
6052 	else
6053 		tmp &= ~LOAD_BALANCE_ENABLE;
6054 	WREG32(RLC_LB_CNTL, tmp);
6055 }
6056 
6057 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6058 {
6059 	u32 i, j, k;
6060 	u32 mask;
6061 
6062 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6063 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6064 			cik_select_se_sh(rdev, i, j);
6065 			for (k = 0; k < rdev->usec_timeout; k++) {
6066 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6067 					break;
6068 				udelay(1);
6069 			}
6070 		}
6071 	}
6072 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6073 
6074 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6075 	for (k = 0; k < rdev->usec_timeout; k++) {
6076 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6077 			break;
6078 		udelay(1);
6079 	}
6080 }
6081 
6082 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6083 {
6084 	u32 tmp;
6085 
6086 	tmp = RREG32(RLC_CNTL);
6087 	if (tmp != rlc)
6088 		WREG32(RLC_CNTL, rlc);
6089 }
6090 
6091 static u32 cik_halt_rlc(struct radeon_device *rdev)
6092 {
6093 	u32 data, orig;
6094 
6095 	orig = data = RREG32(RLC_CNTL);
6096 
6097 	if (data & RLC_ENABLE) {
6098 		u32 i;
6099 
6100 		data &= ~RLC_ENABLE;
6101 		WREG32(RLC_CNTL, data);
6102 
6103 		for (i = 0; i < rdev->usec_timeout; i++) {
6104 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6105 				break;
6106 			udelay(1);
6107 		}
6108 
6109 		cik_wait_for_rlc_serdes(rdev);
6110 	}
6111 
6112 	return orig;
6113 }
6114 
6115 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6116 {
6117 	u32 tmp, i, mask;
6118 
6119 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6120 	WREG32(RLC_GPR_REG2, tmp);
6121 
6122 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6123 	for (i = 0; i < rdev->usec_timeout; i++) {
6124 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6125 			break;
6126 		udelay(1);
6127 	}
6128 
6129 	for (i = 0; i < rdev->usec_timeout; i++) {
6130 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6131 			break;
6132 		udelay(1);
6133 	}
6134 }
6135 
6136 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6137 {
6138 	u32 tmp;
6139 
6140 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6141 	WREG32(RLC_GPR_REG2, tmp);
6142 }
6143 
6144 /**
6145  * cik_rlc_stop - stop the RLC ME
6146  *
6147  * @rdev: radeon_device pointer
6148  *
6149  * Halt the RLC ME (MicroEngine) (CIK).
6150  */
6151 static void cik_rlc_stop(struct radeon_device *rdev)
6152 {
6153 	WREG32(RLC_CNTL, 0);
6154 
6155 	cik_enable_gui_idle_interrupt(rdev, false);
6156 
6157 	cik_wait_for_rlc_serdes(rdev);
6158 }
6159 
6160 /**
6161  * cik_rlc_start - start the RLC ME
6162  *
6163  * @rdev: radeon_device pointer
6164  *
6165  * Unhalt the RLC ME (MicroEngine) (CIK).
6166  */
6167 static void cik_rlc_start(struct radeon_device *rdev)
6168 {
6169 	WREG32(RLC_CNTL, RLC_ENABLE);
6170 
6171 	cik_enable_gui_idle_interrupt(rdev, true);
6172 
6173 	udelay(50);
6174 }
6175 
6176 /**
6177  * cik_rlc_resume - setup the RLC hw
6178  *
6179  * @rdev: radeon_device pointer
6180  *
6181  * Initialize the RLC registers, load the ucode,
6182  * and start the RLC (CIK).
6183  * Returns 0 for success, -EINVAL if the ucode is not available.
6184  */
6185 static int cik_rlc_resume(struct radeon_device *rdev)
6186 {
6187 	u32 i, size, tmp;
6188 
6189 	if (!rdev->rlc_fw)
6190 		return -EINVAL;
6191 
6192 	cik_rlc_stop(rdev);
6193 
6194 	/* disable CG */
6195 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6196 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6197 
6198 	si_rlc_reset(rdev);
6199 
6200 	cik_init_pg(rdev);
6201 
6202 	cik_init_cg(rdev);
6203 
6204 	WREG32(RLC_LB_CNTR_INIT, 0);
6205 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6206 
6207 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6208 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6209 	WREG32(RLC_LB_PARAMS, 0x00600408);
6210 	WREG32(RLC_LB_CNTL, 0x80000004);
6211 
6212 	WREG32(RLC_MC_CNTL, 0);
6213 	WREG32(RLC_UCODE_CNTL, 0);
6214 
6215 	if (rdev->new_fw) {
6216 		const struct rlc_firmware_header_v1_0 *hdr =
6217 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6218 		const __le32 *fw_data = (const __le32 *)
6219 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6220 
6221 		radeon_ucode_print_rlc_hdr(&hdr->header);
6222 
6223 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6224 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6225 		for (i = 0; i < size; i++)
6226 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6227 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6228 	} else {
6229 		const __be32 *fw_data;
6230 
6231 		switch (rdev->family) {
6232 		case CHIP_BONAIRE:
6233 		case CHIP_HAWAII:
6234 		default:
6235 			size = BONAIRE_RLC_UCODE_SIZE;
6236 			break;
6237 		case CHIP_KAVERI:
6238 			size = KV_RLC_UCODE_SIZE;
6239 			break;
6240 		case CHIP_KABINI:
6241 			size = KB_RLC_UCODE_SIZE;
6242 			break;
6243 		case CHIP_MULLINS:
6244 			size = ML_RLC_UCODE_SIZE;
6245 			break;
6246 		}
6247 
6248 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6249 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6250 		for (i = 0; i < size; i++)
6251 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6252 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6253 	}
6254 
6255 	/* XXX - find out what chips support lbpw */
6256 	cik_enable_lbpw(rdev, false);
6257 
6258 	if (rdev->family == CHIP_BONAIRE)
6259 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6260 
6261 	cik_rlc_start(rdev);
6262 
6263 	return 0;
6264 }
6265 
6266 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6267 {
6268 	u32 data, orig, tmp, tmp2;
6269 
6270 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6271 
6272 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6273 		cik_enable_gui_idle_interrupt(rdev, true);
6274 
6275 		tmp = cik_halt_rlc(rdev);
6276 
6277 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6278 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6279 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6280 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6281 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6282 
6283 		cik_update_rlc(rdev, tmp);
6284 
6285 		data |= CGCG_EN | CGLS_EN;
6286 	} else {
6287 		cik_enable_gui_idle_interrupt(rdev, false);
6288 
6289 		RREG32(CB_CGTT_SCLK_CTRL);
6290 		RREG32(CB_CGTT_SCLK_CTRL);
6291 		RREG32(CB_CGTT_SCLK_CTRL);
6292 		RREG32(CB_CGTT_SCLK_CTRL);
6293 
6294 		data &= ~(CGCG_EN | CGLS_EN);
6295 	}
6296 
6297 	if (orig != data)
6298 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6299 
6300 }
6301 
6302 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6303 {
6304 	u32 data, orig, tmp = 0;
6305 
6306 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6307 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6308 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6309 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6310 				data |= CP_MEM_LS_EN;
6311 				if (orig != data)
6312 					WREG32(CP_MEM_SLP_CNTL, data);
6313 			}
6314 		}
6315 
6316 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6317 		data &= 0xfffffffd;
6318 		if (orig != data)
6319 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6320 
6321 		tmp = cik_halt_rlc(rdev);
6322 
6323 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6324 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6325 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6326 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6327 		WREG32(RLC_SERDES_WR_CTRL, data);
6328 
6329 		cik_update_rlc(rdev, tmp);
6330 
6331 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6332 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6333 			data &= ~SM_MODE_MASK;
6334 			data |= SM_MODE(0x2);
6335 			data |= SM_MODE_ENABLE;
6336 			data &= ~CGTS_OVERRIDE;
6337 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6338 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6339 				data &= ~CGTS_LS_OVERRIDE;
6340 			data &= ~ON_MONITOR_ADD_MASK;
6341 			data |= ON_MONITOR_ADD_EN;
6342 			data |= ON_MONITOR_ADD(0x96);
6343 			if (orig != data)
6344 				WREG32(CGTS_SM_CTRL_REG, data);
6345 		}
6346 	} else {
6347 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6348 		data |= 0x00000002;
6349 		if (orig != data)
6350 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6351 
6352 		data = RREG32(RLC_MEM_SLP_CNTL);
6353 		if (data & RLC_MEM_LS_EN) {
6354 			data &= ~RLC_MEM_LS_EN;
6355 			WREG32(RLC_MEM_SLP_CNTL, data);
6356 		}
6357 
6358 		data = RREG32(CP_MEM_SLP_CNTL);
6359 		if (data & CP_MEM_LS_EN) {
6360 			data &= ~CP_MEM_LS_EN;
6361 			WREG32(CP_MEM_SLP_CNTL, data);
6362 		}
6363 
6364 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6365 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6366 		if (orig != data)
6367 			WREG32(CGTS_SM_CTRL_REG, data);
6368 
6369 		tmp = cik_halt_rlc(rdev);
6370 
6371 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6372 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6373 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6374 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6375 		WREG32(RLC_SERDES_WR_CTRL, data);
6376 
6377 		cik_update_rlc(rdev, tmp);
6378 	}
6379 }
6380 
6381 static const u32 mc_cg_registers[] =
6382 {
6383 	MC_HUB_MISC_HUB_CG,
6384 	MC_HUB_MISC_SIP_CG,
6385 	MC_HUB_MISC_VM_CG,
6386 	MC_XPB_CLK_GAT,
6387 	ATC_MISC_CG,
6388 	MC_CITF_MISC_WR_CG,
6389 	MC_CITF_MISC_RD_CG,
6390 	MC_CITF_MISC_VM_CG,
6391 	VM_L2_CG,
6392 };
6393 
6394 static void cik_enable_mc_ls(struct radeon_device *rdev,
6395 			     bool enable)
6396 {
6397 	int i;
6398 	u32 orig, data;
6399 
6400 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6401 		orig = data = RREG32(mc_cg_registers[i]);
6402 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6403 			data |= MC_LS_ENABLE;
6404 		else
6405 			data &= ~MC_LS_ENABLE;
6406 		if (data != orig)
6407 			WREG32(mc_cg_registers[i], data);
6408 	}
6409 }
6410 
6411 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6412 			       bool enable)
6413 {
6414 	int i;
6415 	u32 orig, data;
6416 
6417 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6418 		orig = data = RREG32(mc_cg_registers[i]);
6419 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6420 			data |= MC_CG_ENABLE;
6421 		else
6422 			data &= ~MC_CG_ENABLE;
6423 		if (data != orig)
6424 			WREG32(mc_cg_registers[i], data);
6425 	}
6426 }
6427 
6428 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6429 				 bool enable)
6430 {
6431 	u32 orig, data;
6432 
6433 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6434 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6435 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6436 	} else {
6437 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6438 		data |= 0xff000000;
6439 		if (data != orig)
6440 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6441 
6442 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6443 		data |= 0xff000000;
6444 		if (data != orig)
6445 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6446 	}
6447 }
6448 
6449 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6450 				 bool enable)
6451 {
6452 	u32 orig, data;
6453 
6454 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6455 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6456 		data |= 0x100;
6457 		if (orig != data)
6458 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6459 
6460 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6461 		data |= 0x100;
6462 		if (orig != data)
6463 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6464 	} else {
6465 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6466 		data &= ~0x100;
6467 		if (orig != data)
6468 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6469 
6470 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6471 		data &= ~0x100;
6472 		if (orig != data)
6473 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6474 	}
6475 }
6476 
6477 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6478 				bool enable)
6479 {
6480 	u32 orig, data;
6481 
6482 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6483 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6484 		data = 0xfff;
6485 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6486 
6487 		orig = data = RREG32(UVD_CGC_CTRL);
6488 		data |= DCM;
6489 		if (orig != data)
6490 			WREG32(UVD_CGC_CTRL, data);
6491 	} else {
6492 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6493 		data &= ~0xfff;
6494 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6495 
6496 		orig = data = RREG32(UVD_CGC_CTRL);
6497 		data &= ~DCM;
6498 		if (orig != data)
6499 			WREG32(UVD_CGC_CTRL, data);
6500 	}
6501 }
6502 
6503 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6504 			       bool enable)
6505 {
6506 	u32 orig, data;
6507 
6508 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6509 
6510 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6511 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6512 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6513 	else
6514 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6515 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6516 
6517 	if (orig != data)
6518 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6519 }
6520 
6521 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6522 				bool enable)
6523 {
6524 	u32 orig, data;
6525 
6526 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6527 
6528 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6529 		data &= ~CLOCK_GATING_DIS;
6530 	else
6531 		data |= CLOCK_GATING_DIS;
6532 
6533 	if (orig != data)
6534 		WREG32(HDP_HOST_PATH_CNTL, data);
6535 }
6536 
6537 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6538 			      bool enable)
6539 {
6540 	u32 orig, data;
6541 
6542 	orig = data = RREG32(HDP_MEM_POWER_LS);
6543 
6544 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6545 		data |= HDP_LS_ENABLE;
6546 	else
6547 		data &= ~HDP_LS_ENABLE;
6548 
6549 	if (orig != data)
6550 		WREG32(HDP_MEM_POWER_LS, data);
6551 }
6552 
6553 void cik_update_cg(struct radeon_device *rdev,
6554 		   u32 block, bool enable)
6555 {
6556 
6557 	if (block & RADEON_CG_BLOCK_GFX) {
6558 		cik_enable_gui_idle_interrupt(rdev, false);
6559 		/* order matters! */
6560 		if (enable) {
6561 			cik_enable_mgcg(rdev, true);
6562 			cik_enable_cgcg(rdev, true);
6563 		} else {
6564 			cik_enable_cgcg(rdev, false);
6565 			cik_enable_mgcg(rdev, false);
6566 		}
6567 		cik_enable_gui_idle_interrupt(rdev, true);
6568 	}
6569 
6570 	if (block & RADEON_CG_BLOCK_MC) {
6571 		if (!(rdev->flags & RADEON_IS_IGP)) {
6572 			cik_enable_mc_mgcg(rdev, enable);
6573 			cik_enable_mc_ls(rdev, enable);
6574 		}
6575 	}
6576 
6577 	if (block & RADEON_CG_BLOCK_SDMA) {
6578 		cik_enable_sdma_mgcg(rdev, enable);
6579 		cik_enable_sdma_mgls(rdev, enable);
6580 	}
6581 
6582 	if (block & RADEON_CG_BLOCK_BIF) {
6583 		cik_enable_bif_mgls(rdev, enable);
6584 	}
6585 
6586 	if (block & RADEON_CG_BLOCK_UVD) {
6587 		if (rdev->has_uvd)
6588 			cik_enable_uvd_mgcg(rdev, enable);
6589 	}
6590 
6591 	if (block & RADEON_CG_BLOCK_HDP) {
6592 		cik_enable_hdp_mgcg(rdev, enable);
6593 		cik_enable_hdp_ls(rdev, enable);
6594 	}
6595 
6596 	if (block & RADEON_CG_BLOCK_VCE) {
6597 		vce_v2_0_enable_mgcg(rdev, enable);
6598 	}
6599 }
6600 
6601 static void cik_init_cg(struct radeon_device *rdev)
6602 {
6603 
6604 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6605 
6606 	if (rdev->has_uvd)
6607 		si_init_uvd_internal_cg(rdev);
6608 
6609 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6610 			     RADEON_CG_BLOCK_SDMA |
6611 			     RADEON_CG_BLOCK_BIF |
6612 			     RADEON_CG_BLOCK_UVD |
6613 			     RADEON_CG_BLOCK_HDP), true);
6614 }
6615 
6616 static void cik_fini_cg(struct radeon_device *rdev)
6617 {
6618 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6619 			     RADEON_CG_BLOCK_SDMA |
6620 			     RADEON_CG_BLOCK_BIF |
6621 			     RADEON_CG_BLOCK_UVD |
6622 			     RADEON_CG_BLOCK_HDP), false);
6623 
6624 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6625 }
6626 
6627 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6628 					  bool enable)
6629 {
6630 	u32 data, orig;
6631 
6632 	orig = data = RREG32(RLC_PG_CNTL);
6633 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6634 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6635 	else
6636 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6637 	if (orig != data)
6638 		WREG32(RLC_PG_CNTL, data);
6639 }
6640 
6641 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6642 					  bool enable)
6643 {
6644 	u32 data, orig;
6645 
6646 	orig = data = RREG32(RLC_PG_CNTL);
6647 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6648 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6649 	else
6650 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6651 	if (orig != data)
6652 		WREG32(RLC_PG_CNTL, data);
6653 }
6654 
6655 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6656 {
6657 	u32 data, orig;
6658 
6659 	orig = data = RREG32(RLC_PG_CNTL);
6660 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6661 		data &= ~DISABLE_CP_PG;
6662 	else
6663 		data |= DISABLE_CP_PG;
6664 	if (orig != data)
6665 		WREG32(RLC_PG_CNTL, data);
6666 }
6667 
6668 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6669 {
6670 	u32 data, orig;
6671 
6672 	orig = data = RREG32(RLC_PG_CNTL);
6673 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6674 		data &= ~DISABLE_GDS_PG;
6675 	else
6676 		data |= DISABLE_GDS_PG;
6677 	if (orig != data)
6678 		WREG32(RLC_PG_CNTL, data);
6679 }
6680 
6681 #define CP_ME_TABLE_SIZE    96
6682 #define CP_ME_TABLE_OFFSET  2048
6683 #define CP_MEC_TABLE_OFFSET 4096
6684 
6685 void cik_init_cp_pg_table(struct radeon_device *rdev)
6686 {
6687 	volatile u32 *dst_ptr;
6688 	int me, i, max_me = 4;
6689 	u32 bo_offset = 0;
6690 	u32 table_offset, table_size;
6691 
6692 	if (rdev->family == CHIP_KAVERI)
6693 		max_me = 5;
6694 
6695 	if (rdev->rlc.cp_table_ptr == NULL)
6696 		return;
6697 
6698 	/* write the cp table buffer */
6699 	dst_ptr = rdev->rlc.cp_table_ptr;
6700 	for (me = 0; me < max_me; me++) {
6701 		if (rdev->new_fw) {
6702 			const __le32 *fw_data;
6703 			const struct gfx_firmware_header_v1_0 *hdr;
6704 
6705 			if (me == 0) {
6706 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6707 				fw_data = (const __le32 *)
6708 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6709 				table_offset = le32_to_cpu(hdr->jt_offset);
6710 				table_size = le32_to_cpu(hdr->jt_size);
6711 			} else if (me == 1) {
6712 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6713 				fw_data = (const __le32 *)
6714 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6715 				table_offset = le32_to_cpu(hdr->jt_offset);
6716 				table_size = le32_to_cpu(hdr->jt_size);
6717 			} else if (me == 2) {
6718 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6719 				fw_data = (const __le32 *)
6720 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6721 				table_offset = le32_to_cpu(hdr->jt_offset);
6722 				table_size = le32_to_cpu(hdr->jt_size);
6723 			} else if (me == 3) {
6724 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6725 				fw_data = (const __le32 *)
6726 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6727 				table_offset = le32_to_cpu(hdr->jt_offset);
6728 				table_size = le32_to_cpu(hdr->jt_size);
6729 			} else {
6730 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6731 				fw_data = (const __le32 *)
6732 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6733 				table_offset = le32_to_cpu(hdr->jt_offset);
6734 				table_size = le32_to_cpu(hdr->jt_size);
6735 			}
6736 
6737 			for (i = 0; i < table_size; i ++) {
6738 				dst_ptr[bo_offset + i] =
6739 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6740 			}
6741 			bo_offset += table_size;
6742 		} else {
6743 			const __be32 *fw_data;
6744 			table_size = CP_ME_TABLE_SIZE;
6745 
6746 			if (me == 0) {
6747 				fw_data = (const __be32 *)rdev->ce_fw->data;
6748 				table_offset = CP_ME_TABLE_OFFSET;
6749 			} else if (me == 1) {
6750 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6751 				table_offset = CP_ME_TABLE_OFFSET;
6752 			} else if (me == 2) {
6753 				fw_data = (const __be32 *)rdev->me_fw->data;
6754 				table_offset = CP_ME_TABLE_OFFSET;
6755 			} else {
6756 				fw_data = (const __be32 *)rdev->mec_fw->data;
6757 				table_offset = CP_MEC_TABLE_OFFSET;
6758 			}
6759 
6760 			for (i = 0; i < table_size; i ++) {
6761 				dst_ptr[bo_offset + i] =
6762 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6763 			}
6764 			bo_offset += table_size;
6765 		}
6766 	}
6767 }
6768 
6769 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6770 				bool enable)
6771 {
6772 	u32 data, orig;
6773 
6774 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6775 		orig = data = RREG32(RLC_PG_CNTL);
6776 		data |= GFX_PG_ENABLE;
6777 		if (orig != data)
6778 			WREG32(RLC_PG_CNTL, data);
6779 
6780 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6781 		data |= AUTO_PG_EN;
6782 		if (orig != data)
6783 			WREG32(RLC_AUTO_PG_CTRL, data);
6784 	} else {
6785 		orig = data = RREG32(RLC_PG_CNTL);
6786 		data &= ~GFX_PG_ENABLE;
6787 		if (orig != data)
6788 			WREG32(RLC_PG_CNTL, data);
6789 
6790 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6791 		data &= ~AUTO_PG_EN;
6792 		if (orig != data)
6793 			WREG32(RLC_AUTO_PG_CTRL, data);
6794 
6795 		data = RREG32(DB_RENDER_CONTROL);
6796 	}
6797 }
6798 
6799 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6800 {
6801 	u32 mask = 0, tmp, tmp1;
6802 	int i;
6803 
6804 	cik_select_se_sh(rdev, se, sh);
6805 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6806 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6807 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6808 
6809 	tmp &= 0xffff0000;
6810 
6811 	tmp |= tmp1;
6812 	tmp >>= 16;
6813 
6814 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6815 		mask <<= 1;
6816 		mask |= 1;
6817 	}
6818 
6819 	return (~tmp) & mask;
6820 }
6821 
6822 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6823 {
6824 	u32 i, j, k, active_cu_number = 0;
6825 	u32 mask, counter, cu_bitmap;
6826 	u32 tmp = 0;
6827 
6828 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6829 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6830 			mask = 1;
6831 			cu_bitmap = 0;
6832 			counter = 0;
6833 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6834 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6835 					if (counter < 2)
6836 						cu_bitmap |= mask;
6837 					counter ++;
6838 				}
6839 				mask <<= 1;
6840 			}
6841 
6842 			active_cu_number += counter;
6843 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6844 		}
6845 	}
6846 
6847 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6848 
6849 	tmp = RREG32(RLC_MAX_PG_CU);
6850 	tmp &= ~MAX_PU_CU_MASK;
6851 	tmp |= MAX_PU_CU(active_cu_number);
6852 	WREG32(RLC_MAX_PG_CU, tmp);
6853 }
6854 
6855 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6856 				       bool enable)
6857 {
6858 	u32 data, orig;
6859 
6860 	orig = data = RREG32(RLC_PG_CNTL);
6861 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6862 		data |= STATIC_PER_CU_PG_ENABLE;
6863 	else
6864 		data &= ~STATIC_PER_CU_PG_ENABLE;
6865 	if (orig != data)
6866 		WREG32(RLC_PG_CNTL, data);
6867 }
6868 
6869 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6870 					bool enable)
6871 {
6872 	u32 data, orig;
6873 
6874 	orig = data = RREG32(RLC_PG_CNTL);
6875 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6876 		data |= DYN_PER_CU_PG_ENABLE;
6877 	else
6878 		data &= ~DYN_PER_CU_PG_ENABLE;
6879 	if (orig != data)
6880 		WREG32(RLC_PG_CNTL, data);
6881 }
6882 
6883 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6884 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6885 
6886 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6887 {
6888 	u32 data, orig;
6889 	u32 i;
6890 
6891 	if (rdev->rlc.cs_data) {
6892 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6893 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6894 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6895 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6896 	} else {
6897 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6898 		for (i = 0; i < 3; i++)
6899 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6900 	}
6901 	if (rdev->rlc.reg_list) {
6902 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6903 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6904 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6905 	}
6906 
6907 	orig = data = RREG32(RLC_PG_CNTL);
6908 	data |= GFX_PG_SRC;
6909 	if (orig != data)
6910 		WREG32(RLC_PG_CNTL, data);
6911 
6912 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6913 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6914 
6915 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6916 	data &= ~IDLE_POLL_COUNT_MASK;
6917 	data |= IDLE_POLL_COUNT(0x60);
6918 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6919 
6920 	data = 0x10101010;
6921 	WREG32(RLC_PG_DELAY, data);
6922 
6923 	data = RREG32(RLC_PG_DELAY_2);
6924 	data &= ~0xff;
6925 	data |= 0x3;
6926 	WREG32(RLC_PG_DELAY_2, data);
6927 
6928 	data = RREG32(RLC_AUTO_PG_CTRL);
6929 	data &= ~GRBM_REG_SGIT_MASK;
6930 	data |= GRBM_REG_SGIT(0x700);
6931 	WREG32(RLC_AUTO_PG_CTRL, data);
6932 
6933 }
6934 
6935 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6936 {
6937 	cik_enable_gfx_cgpg(rdev, enable);
6938 	cik_enable_gfx_static_mgpg(rdev, enable);
6939 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6940 }
6941 
6942 u32 cik_get_csb_size(struct radeon_device *rdev)
6943 {
6944 	u32 count = 0;
6945 	const struct cs_section_def *sect = NULL;
6946 	const struct cs_extent_def *ext = NULL;
6947 
6948 	if (rdev->rlc.cs_data == NULL)
6949 		return 0;
6950 
6951 	/* begin clear state */
6952 	count += 2;
6953 	/* context control state */
6954 	count += 3;
6955 
6956 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6957 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6958 			if (sect->id == SECT_CONTEXT)
6959 				count += 2 + ext->reg_count;
6960 			else
6961 				return 0;
6962 		}
6963 	}
6964 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6965 	count += 4;
6966 	/* end clear state */
6967 	count += 2;
6968 	/* clear state */
6969 	count += 2;
6970 
6971 	return count;
6972 }
6973 
6974 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6975 {
6976 	u32 count = 0, i;
6977 	const struct cs_section_def *sect = NULL;
6978 	const struct cs_extent_def *ext = NULL;
6979 
6980 	if (rdev->rlc.cs_data == NULL)
6981 		return;
6982 	if (buffer == NULL)
6983 		return;
6984 
6985 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6986 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6987 
6988 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6989 	buffer[count++] = cpu_to_le32(0x80000000);
6990 	buffer[count++] = cpu_to_le32(0x80000000);
6991 
6992 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6993 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6994 			if (sect->id == SECT_CONTEXT) {
6995 				buffer[count++] =
6996 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6997 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6998 				for (i = 0; i < ext->reg_count; i++)
6999 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7000 			} else {
7001 				return;
7002 			}
7003 		}
7004 	}
7005 
7006 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7007 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7008 	switch (rdev->family) {
7009 	case CHIP_BONAIRE:
7010 		buffer[count++] = cpu_to_le32(0x16000012);
7011 		buffer[count++] = cpu_to_le32(0x00000000);
7012 		break;
7013 	case CHIP_KAVERI:
7014 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7015 		buffer[count++] = cpu_to_le32(0x00000000);
7016 		break;
7017 	case CHIP_KABINI:
7018 	case CHIP_MULLINS:
7019 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7020 		buffer[count++] = cpu_to_le32(0x00000000);
7021 		break;
7022 	case CHIP_HAWAII:
7023 		buffer[count++] = cpu_to_le32(0x3a00161a);
7024 		buffer[count++] = cpu_to_le32(0x0000002e);
7025 		break;
7026 	default:
7027 		buffer[count++] = cpu_to_le32(0x00000000);
7028 		buffer[count++] = cpu_to_le32(0x00000000);
7029 		break;
7030 	}
7031 
7032 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7033 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7034 
7035 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7036 	buffer[count++] = cpu_to_le32(0);
7037 }
7038 
7039 static void cik_init_pg(struct radeon_device *rdev)
7040 {
7041 	if (rdev->pg_flags) {
7042 		cik_enable_sck_slowdown_on_pu(rdev, true);
7043 		cik_enable_sck_slowdown_on_pd(rdev, true);
7044 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7045 			cik_init_gfx_cgpg(rdev);
7046 			cik_enable_cp_pg(rdev, true);
7047 			cik_enable_gds_pg(rdev, true);
7048 		}
7049 		cik_init_ao_cu_mask(rdev);
7050 		cik_update_gfx_pg(rdev, true);
7051 	}
7052 }
7053 
7054 static void cik_fini_pg(struct radeon_device *rdev)
7055 {
7056 	if (rdev->pg_flags) {
7057 		cik_update_gfx_pg(rdev, false);
7058 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7059 			cik_enable_cp_pg(rdev, false);
7060 			cik_enable_gds_pg(rdev, false);
7061 		}
7062 	}
7063 }
7064 
7065 /*
7066  * Interrupts
7067  * Starting with r6xx, interrupts are handled via a ring buffer.
7068  * Ring buffers are areas of GPU accessible memory that the GPU
7069  * writes interrupt vectors into and the host reads vectors out of.
7070  * There is a rptr (read pointer) that determines where the
7071  * host is currently reading, and a wptr (write pointer)
7072  * which determines where the GPU has written.  When the
7073  * pointers are equal, the ring is idle.  When the GPU
7074  * writes vectors to the ring buffer, it increments the
7075  * wptr.  When there is an interrupt, the host then starts
7076  * fetching commands and processing them until the pointers are
7077  * equal again at which point it updates the rptr.
7078  */
7079 
7080 /**
7081  * cik_enable_interrupts - Enable the interrupt ring buffer
7082  *
7083  * @rdev: radeon_device pointer
7084  *
7085  * Enable the interrupt ring buffer (CIK).
7086  */
7087 static void cik_enable_interrupts(struct radeon_device *rdev)
7088 {
7089 	u32 ih_cntl = RREG32(IH_CNTL);
7090 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7091 
7092 	ih_cntl |= ENABLE_INTR;
7093 	ih_rb_cntl |= IH_RB_ENABLE;
7094 	WREG32(IH_CNTL, ih_cntl);
7095 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7096 	rdev->ih.enabled = true;
7097 }
7098 
7099 /**
7100  * cik_disable_interrupts - Disable the interrupt ring buffer
7101  *
7102  * @rdev: radeon_device pointer
7103  *
7104  * Disable the interrupt ring buffer (CIK).
7105  */
7106 static void cik_disable_interrupts(struct radeon_device *rdev)
7107 {
7108 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7109 	u32 ih_cntl = RREG32(IH_CNTL);
7110 
7111 	ih_rb_cntl &= ~IH_RB_ENABLE;
7112 	ih_cntl &= ~ENABLE_INTR;
7113 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7114 	WREG32(IH_CNTL, ih_cntl);
7115 	/* set rptr, wptr to 0 */
7116 	WREG32(IH_RB_RPTR, 0);
7117 	WREG32(IH_RB_WPTR, 0);
7118 	rdev->ih.enabled = false;
7119 	rdev->ih.rptr = 0;
7120 }
7121 
7122 /**
7123  * cik_disable_interrupt_state - Disable all interrupt sources
7124  *
7125  * @rdev: radeon_device pointer
7126  *
7127  * Clear all interrupt enable bits used by the driver (CIK).
7128  */
7129 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7130 {
7131 	u32 tmp;
7132 
7133 	/* gfx ring */
7134 	tmp = RREG32(CP_INT_CNTL_RING0) &
7135 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7136 	WREG32(CP_INT_CNTL_RING0, tmp);
7137 	/* sdma */
7138 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7139 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7140 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7141 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7142 	/* compute queues */
7143 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7144 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7145 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7146 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7147 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7148 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7149 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7150 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7151 	/* grbm */
7152 	WREG32(GRBM_INT_CNTL, 0);
7153 	/* vline/vblank, etc. */
7154 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7155 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7156 	if (rdev->num_crtc >= 4) {
7157 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7158 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7159 	}
7160 	if (rdev->num_crtc >= 6) {
7161 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7162 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7163 	}
7164 	/* pflip */
7165 	if (rdev->num_crtc >= 2) {
7166 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7167 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7168 	}
7169 	if (rdev->num_crtc >= 4) {
7170 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7171 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7172 	}
7173 	if (rdev->num_crtc >= 6) {
7174 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7175 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7176 	}
7177 
7178 	/* dac hotplug */
7179 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7180 
7181 	/* digital hotplug */
7182 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7183 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7184 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7185 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7186 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7187 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7188 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7189 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7190 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7191 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7192 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7193 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7194 
7195 }
7196 
7197 /**
7198  * cik_irq_init - init and enable the interrupt ring
7199  *
7200  * @rdev: radeon_device pointer
7201  *
7202  * Allocate a ring buffer for the interrupt controller,
7203  * enable the RLC, disable interrupts, enable the IH
7204  * ring buffer and enable it (CIK).
7205  * Called at device load and reume.
7206  * Returns 0 for success, errors for failure.
7207  */
7208 static int cik_irq_init(struct radeon_device *rdev)
7209 {
7210 	int ret = 0;
7211 	int rb_bufsz;
7212 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7213 
7214 	/* allocate ring */
7215 	ret = r600_ih_ring_alloc(rdev);
7216 	if (ret)
7217 		return ret;
7218 
7219 	/* disable irqs */
7220 	cik_disable_interrupts(rdev);
7221 
7222 	/* init rlc */
7223 	ret = cik_rlc_resume(rdev);
7224 	if (ret) {
7225 		r600_ih_ring_fini(rdev);
7226 		return ret;
7227 	}
7228 
7229 	/* setup interrupt control */
7230 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7231 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7232 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7233 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7234 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7235 	 */
7236 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7237 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7238 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7239 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7240 
7241 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7242 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7243 
7244 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7245 		      IH_WPTR_OVERFLOW_CLEAR |
7246 		      (rb_bufsz << 1));
7247 
7248 	if (rdev->wb.enabled)
7249 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7250 
7251 	/* set the writeback address whether it's enabled or not */
7252 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7253 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7254 
7255 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7256 
7257 	/* set rptr, wptr to 0 */
7258 	WREG32(IH_RB_RPTR, 0);
7259 	WREG32(IH_RB_WPTR, 0);
7260 
7261 	/* Default settings for IH_CNTL (disabled at first) */
7262 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7263 	/* RPTR_REARM only works if msi's are enabled */
7264 	if (rdev->msi_enabled)
7265 		ih_cntl |= RPTR_REARM;
7266 	WREG32(IH_CNTL, ih_cntl);
7267 
7268 	/* force the active interrupt state to all disabled */
7269 	cik_disable_interrupt_state(rdev);
7270 
7271 	pci_set_master(rdev->pdev);
7272 
7273 	/* enable irqs */
7274 	cik_enable_interrupts(rdev);
7275 
7276 	return ret;
7277 }
7278 
7279 /**
7280  * cik_irq_set - enable/disable interrupt sources
7281  *
7282  * @rdev: radeon_device pointer
7283  *
7284  * Enable interrupt sources on the GPU (vblanks, hpd,
7285  * etc.) (CIK).
7286  * Returns 0 for success, errors for failure.
7287  */
7288 int cik_irq_set(struct radeon_device *rdev)
7289 {
7290 	u32 cp_int_cntl;
7291 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7292 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7293 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7294 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7295 	u32 grbm_int_cntl = 0;
7296 	u32 dma_cntl, dma_cntl1;
7297 	u32 thermal_int;
7298 
7299 	if (!rdev->irq.installed) {
7300 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7301 		return -EINVAL;
7302 	}
7303 	/* don't enable anything if the ih is disabled */
7304 	if (!rdev->ih.enabled) {
7305 		cik_disable_interrupts(rdev);
7306 		/* force the active interrupt state to all disabled */
7307 		cik_disable_interrupt_state(rdev);
7308 		return 0;
7309 	}
7310 
7311 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7312 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7313 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7314 
7315 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7316 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7317 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7318 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7319 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7320 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7321 
7322 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7323 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7324 
7325 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7326 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7327 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7328 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7329 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7330 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7331 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7332 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7333 
7334 	if (rdev->flags & RADEON_IS_IGP)
7335 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7336 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7337 	else
7338 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7339 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7340 
7341 	/* enable CP interrupts on all rings */
7342 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7343 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7344 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7345 	}
7346 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7347 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7348 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7349 		if (ring->me == 1) {
7350 			switch (ring->pipe) {
7351 			case 0:
7352 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7353 				break;
7354 			case 1:
7355 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7356 				break;
7357 			case 2:
7358 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7359 				break;
7360 			case 3:
7361 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7362 				break;
7363 			default:
7364 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7365 				break;
7366 			}
7367 		} else if (ring->me == 2) {
7368 			switch (ring->pipe) {
7369 			case 0:
7370 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7371 				break;
7372 			case 1:
7373 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7374 				break;
7375 			case 2:
7376 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7377 				break;
7378 			case 3:
7379 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7380 				break;
7381 			default:
7382 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7383 				break;
7384 			}
7385 		} else {
7386 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7387 		}
7388 	}
7389 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7390 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7391 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7392 		if (ring->me == 1) {
7393 			switch (ring->pipe) {
7394 			case 0:
7395 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7396 				break;
7397 			case 1:
7398 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7399 				break;
7400 			case 2:
7401 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7402 				break;
7403 			case 3:
7404 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7405 				break;
7406 			default:
7407 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7408 				break;
7409 			}
7410 		} else if (ring->me == 2) {
7411 			switch (ring->pipe) {
7412 			case 0:
7413 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7414 				break;
7415 			case 1:
7416 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7417 				break;
7418 			case 2:
7419 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7420 				break;
7421 			case 3:
7422 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7423 				break;
7424 			default:
7425 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7426 				break;
7427 			}
7428 		} else {
7429 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7430 		}
7431 	}
7432 
7433 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7434 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7435 		dma_cntl |= TRAP_ENABLE;
7436 	}
7437 
7438 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7439 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7440 		dma_cntl1 |= TRAP_ENABLE;
7441 	}
7442 
7443 	if (rdev->irq.crtc_vblank_int[0] ||
7444 	    atomic_read(&rdev->irq.pflip[0])) {
7445 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7446 		crtc1 |= VBLANK_INTERRUPT_MASK;
7447 	}
7448 	if (rdev->irq.crtc_vblank_int[1] ||
7449 	    atomic_read(&rdev->irq.pflip[1])) {
7450 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7451 		crtc2 |= VBLANK_INTERRUPT_MASK;
7452 	}
7453 	if (rdev->irq.crtc_vblank_int[2] ||
7454 	    atomic_read(&rdev->irq.pflip[2])) {
7455 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7456 		crtc3 |= VBLANK_INTERRUPT_MASK;
7457 	}
7458 	if (rdev->irq.crtc_vblank_int[3] ||
7459 	    atomic_read(&rdev->irq.pflip[3])) {
7460 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7461 		crtc4 |= VBLANK_INTERRUPT_MASK;
7462 	}
7463 	if (rdev->irq.crtc_vblank_int[4] ||
7464 	    atomic_read(&rdev->irq.pflip[4])) {
7465 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7466 		crtc5 |= VBLANK_INTERRUPT_MASK;
7467 	}
7468 	if (rdev->irq.crtc_vblank_int[5] ||
7469 	    atomic_read(&rdev->irq.pflip[5])) {
7470 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7471 		crtc6 |= VBLANK_INTERRUPT_MASK;
7472 	}
7473 	if (rdev->irq.hpd[0]) {
7474 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7475 		hpd1 |= DC_HPDx_INT_EN;
7476 	}
7477 	if (rdev->irq.hpd[1]) {
7478 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7479 		hpd2 |= DC_HPDx_INT_EN;
7480 	}
7481 	if (rdev->irq.hpd[2]) {
7482 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7483 		hpd3 |= DC_HPDx_INT_EN;
7484 	}
7485 	if (rdev->irq.hpd[3]) {
7486 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7487 		hpd4 |= DC_HPDx_INT_EN;
7488 	}
7489 	if (rdev->irq.hpd[4]) {
7490 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7491 		hpd5 |= DC_HPDx_INT_EN;
7492 	}
7493 	if (rdev->irq.hpd[5]) {
7494 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7495 		hpd6 |= DC_HPDx_INT_EN;
7496 	}
7497 
7498 	if (rdev->irq.dpm_thermal) {
7499 		DRM_DEBUG("dpm thermal\n");
7500 		if (rdev->flags & RADEON_IS_IGP)
7501 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7502 		else
7503 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7504 	}
7505 
7506 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7507 
7508 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7509 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7510 
7511 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7512 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7513 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7514 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7515 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7516 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7517 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7518 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7519 
7520 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7521 
7522 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7523 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7524 	if (rdev->num_crtc >= 4) {
7525 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7526 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7527 	}
7528 	if (rdev->num_crtc >= 6) {
7529 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7530 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7531 	}
7532 
7533 	if (rdev->num_crtc >= 2) {
7534 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7535 		       GRPH_PFLIP_INT_MASK);
7536 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7537 		       GRPH_PFLIP_INT_MASK);
7538 	}
7539 	if (rdev->num_crtc >= 4) {
7540 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7541 		       GRPH_PFLIP_INT_MASK);
7542 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7543 		       GRPH_PFLIP_INT_MASK);
7544 	}
7545 	if (rdev->num_crtc >= 6) {
7546 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7547 		       GRPH_PFLIP_INT_MASK);
7548 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7549 		       GRPH_PFLIP_INT_MASK);
7550 	}
7551 
7552 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7553 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7554 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7555 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7556 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7557 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7558 
7559 	if (rdev->flags & RADEON_IS_IGP)
7560 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7561 	else
7562 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7563 
7564 	return 0;
7565 }
7566 
7567 /**
7568  * cik_irq_ack - ack interrupt sources
7569  *
7570  * @rdev: radeon_device pointer
7571  *
7572  * Ack interrupt sources on the GPU (vblanks, hpd,
7573  * etc.) (CIK).  Certain interrupts sources are sw
7574  * generated and do not require an explicit ack.
7575  */
7576 static inline void cik_irq_ack(struct radeon_device *rdev)
7577 {
7578 	u32 tmp;
7579 
7580 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7581 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7582 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7583 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7584 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7585 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7586 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7587 
7588 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7589 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7590 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7591 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7592 	if (rdev->num_crtc >= 4) {
7593 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7594 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7595 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7596 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7597 	}
7598 	if (rdev->num_crtc >= 6) {
7599 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7600 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7601 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7602 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7603 	}
7604 
7605 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7606 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7607 		       GRPH_PFLIP_INT_CLEAR);
7608 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7609 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7610 		       GRPH_PFLIP_INT_CLEAR);
7611 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7612 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7613 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7614 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7615 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7616 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7617 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7618 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7619 
7620 	if (rdev->num_crtc >= 4) {
7621 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7622 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7623 			       GRPH_PFLIP_INT_CLEAR);
7624 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7625 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7626 			       GRPH_PFLIP_INT_CLEAR);
7627 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7628 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7629 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7630 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7631 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7632 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7633 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7634 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7635 	}
7636 
7637 	if (rdev->num_crtc >= 6) {
7638 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7639 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7640 			       GRPH_PFLIP_INT_CLEAR);
7641 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7642 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7643 			       GRPH_PFLIP_INT_CLEAR);
7644 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7645 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7646 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7647 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7648 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7649 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7650 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7651 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7652 	}
7653 
7654 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7655 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7656 		tmp |= DC_HPDx_INT_ACK;
7657 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7658 	}
7659 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7660 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7661 		tmp |= DC_HPDx_INT_ACK;
7662 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7663 	}
7664 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7665 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7666 		tmp |= DC_HPDx_INT_ACK;
7667 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7668 	}
7669 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7670 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7671 		tmp |= DC_HPDx_INT_ACK;
7672 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7673 	}
7674 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7675 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7676 		tmp |= DC_HPDx_INT_ACK;
7677 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7678 	}
7679 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7680 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7681 		tmp |= DC_HPDx_INT_ACK;
7682 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7683 	}
7684 }
7685 
7686 /**
7687  * cik_irq_disable - disable interrupts
7688  *
7689  * @rdev: radeon_device pointer
7690  *
7691  * Disable interrupts on the hw (CIK).
7692  */
7693 static void cik_irq_disable(struct radeon_device *rdev)
7694 {
7695 	cik_disable_interrupts(rdev);
7696 	/* Wait and acknowledge irq */
7697 	mdelay(1);
7698 	cik_irq_ack(rdev);
7699 	cik_disable_interrupt_state(rdev);
7700 }
7701 
7702 /**
7703  * cik_irq_disable - disable interrupts for suspend
7704  *
7705  * @rdev: radeon_device pointer
7706  *
7707  * Disable interrupts and stop the RLC (CIK).
7708  * Used for suspend.
7709  */
7710 static void cik_irq_suspend(struct radeon_device *rdev)
7711 {
7712 	cik_irq_disable(rdev);
7713 	cik_rlc_stop(rdev);
7714 }
7715 
7716 /**
7717  * cik_irq_fini - tear down interrupt support
7718  *
7719  * @rdev: radeon_device pointer
7720  *
7721  * Disable interrupts on the hw and free the IH ring
7722  * buffer (CIK).
7723  * Used for driver unload.
7724  */
7725 static void cik_irq_fini(struct radeon_device *rdev)
7726 {
7727 	cik_irq_suspend(rdev);
7728 	r600_ih_ring_fini(rdev);
7729 }
7730 
7731 /**
7732  * cik_get_ih_wptr - get the IH ring buffer wptr
7733  *
7734  * @rdev: radeon_device pointer
7735  *
7736  * Get the IH ring buffer wptr from either the register
7737  * or the writeback memory buffer (CIK).  Also check for
7738  * ring buffer overflow and deal with it.
7739  * Used by cik_irq_process().
7740  * Returns the value of the wptr.
7741  */
7742 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7743 {
7744 	u32 wptr, tmp;
7745 
7746 	if (rdev->wb.enabled)
7747 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7748 	else
7749 		wptr = RREG32(IH_RB_WPTR);
7750 
7751 	if (wptr & RB_OVERFLOW) {
7752 		wptr &= ~RB_OVERFLOW;
7753 		/* When a ring buffer overflow happen start parsing interrupt
7754 		 * from the last not overwritten vector (wptr + 16). Hopefully
7755 		 * this should allow us to catchup.
7756 		 */
7757 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7758 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7759 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7760 		tmp = RREG32(IH_RB_CNTL);
7761 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7762 		WREG32(IH_RB_CNTL, tmp);
7763 	}
7764 	return (wptr & rdev->ih.ptr_mask);
7765 }
7766 
7767 /*        CIK IV Ring
7768  * Each IV ring entry is 128 bits:
7769  * [7:0]    - interrupt source id
7770  * [31:8]   - reserved
7771  * [59:32]  - interrupt source data
7772  * [63:60]  - reserved
7773  * [71:64]  - RINGID
7774  *            CP:
7775  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7776  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7777  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7778  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7779  *            PIPE_ID - ME0 0=3D
7780  *                    - ME1&2 compute dispatcher (4 pipes each)
7781  *            SDMA:
7782  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7783  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7784  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7785  * [79:72]  - VMID
7786  * [95:80]  - PASID
7787  * [127:96] - reserved
7788  */
7789 /**
7790  * cik_irq_process - interrupt handler
7791  *
7792  * @rdev: radeon_device pointer
7793  *
7794  * Interrupt hander (CIK).  Walk the IH ring,
7795  * ack interrupts and schedule work to handle
7796  * interrupt events.
7797  * Returns irq process return code.
7798  */
7799 int cik_irq_process(struct radeon_device *rdev)
7800 {
7801 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7802 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7803 	u32 wptr;
7804 	u32 rptr;
7805 	u32 src_id, src_data, ring_id;
7806 	u8 me_id, pipe_id, queue_id;
7807 	u32 ring_index;
7808 	bool queue_hotplug = false;
7809 	bool queue_reset = false;
7810 	u32 addr, status, mc_client;
7811 	bool queue_thermal = false;
7812 
7813 	if (!rdev->ih.enabled || rdev->shutdown)
7814 		return IRQ_NONE;
7815 
7816 	wptr = cik_get_ih_wptr(rdev);
7817 
7818 restart_ih:
7819 	/* is somebody else already processing irqs? */
7820 	if (atomic_xchg(&rdev->ih.lock, 1))
7821 		return IRQ_NONE;
7822 
7823 	rptr = rdev->ih.rptr;
7824 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7825 
7826 	/* Order reading of wptr vs. reading of IH ring data */
7827 	rmb();
7828 
7829 	/* display interrupts */
7830 	cik_irq_ack(rdev);
7831 
7832 	while (rptr != wptr) {
7833 		/* wptr/rptr are in bytes! */
7834 		ring_index = rptr / 4;
7835 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7836 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7837 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7838 
7839 		switch (src_id) {
7840 		case 1: /* D1 vblank/vline */
7841 			switch (src_data) {
7842 			case 0: /* D1 vblank */
7843 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7844 					if (rdev->irq.crtc_vblank_int[0]) {
7845 						drm_handle_vblank(rdev->ddev, 0);
7846 						rdev->pm.vblank_sync = true;
7847 						wake_up(&rdev->irq.vblank_queue);
7848 					}
7849 					if (atomic_read(&rdev->irq.pflip[0]))
7850 						radeon_crtc_handle_vblank(rdev, 0);
7851 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7852 					DRM_DEBUG("IH: D1 vblank\n");
7853 				}
7854 				break;
7855 			case 1: /* D1 vline */
7856 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7857 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7858 					DRM_DEBUG("IH: D1 vline\n");
7859 				}
7860 				break;
7861 			default:
7862 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7863 				break;
7864 			}
7865 			break;
7866 		case 2: /* D2 vblank/vline */
7867 			switch (src_data) {
7868 			case 0: /* D2 vblank */
7869 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7870 					if (rdev->irq.crtc_vblank_int[1]) {
7871 						drm_handle_vblank(rdev->ddev, 1);
7872 						rdev->pm.vblank_sync = true;
7873 						wake_up(&rdev->irq.vblank_queue);
7874 					}
7875 					if (atomic_read(&rdev->irq.pflip[1]))
7876 						radeon_crtc_handle_vblank(rdev, 1);
7877 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7878 					DRM_DEBUG("IH: D2 vblank\n");
7879 				}
7880 				break;
7881 			case 1: /* D2 vline */
7882 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7883 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7884 					DRM_DEBUG("IH: D2 vline\n");
7885 				}
7886 				break;
7887 			default:
7888 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7889 				break;
7890 			}
7891 			break;
7892 		case 3: /* D3 vblank/vline */
7893 			switch (src_data) {
7894 			case 0: /* D3 vblank */
7895 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7896 					if (rdev->irq.crtc_vblank_int[2]) {
7897 						drm_handle_vblank(rdev->ddev, 2);
7898 						rdev->pm.vblank_sync = true;
7899 						wake_up(&rdev->irq.vblank_queue);
7900 					}
7901 					if (atomic_read(&rdev->irq.pflip[2]))
7902 						radeon_crtc_handle_vblank(rdev, 2);
7903 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7904 					DRM_DEBUG("IH: D3 vblank\n");
7905 				}
7906 				break;
7907 			case 1: /* D3 vline */
7908 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7909 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7910 					DRM_DEBUG("IH: D3 vline\n");
7911 				}
7912 				break;
7913 			default:
7914 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7915 				break;
7916 			}
7917 			break;
7918 		case 4: /* D4 vblank/vline */
7919 			switch (src_data) {
7920 			case 0: /* D4 vblank */
7921 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7922 					if (rdev->irq.crtc_vblank_int[3]) {
7923 						drm_handle_vblank(rdev->ddev, 3);
7924 						rdev->pm.vblank_sync = true;
7925 						wake_up(&rdev->irq.vblank_queue);
7926 					}
7927 					if (atomic_read(&rdev->irq.pflip[3]))
7928 						radeon_crtc_handle_vblank(rdev, 3);
7929 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7930 					DRM_DEBUG("IH: D4 vblank\n");
7931 				}
7932 				break;
7933 			case 1: /* D4 vline */
7934 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7935 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7936 					DRM_DEBUG("IH: D4 vline\n");
7937 				}
7938 				break;
7939 			default:
7940 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7941 				break;
7942 			}
7943 			break;
7944 		case 5: /* D5 vblank/vline */
7945 			switch (src_data) {
7946 			case 0: /* D5 vblank */
7947 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7948 					if (rdev->irq.crtc_vblank_int[4]) {
7949 						drm_handle_vblank(rdev->ddev, 4);
7950 						rdev->pm.vblank_sync = true;
7951 						wake_up(&rdev->irq.vblank_queue);
7952 					}
7953 					if (atomic_read(&rdev->irq.pflip[4]))
7954 						radeon_crtc_handle_vblank(rdev, 4);
7955 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7956 					DRM_DEBUG("IH: D5 vblank\n");
7957 				}
7958 				break;
7959 			case 1: /* D5 vline */
7960 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7961 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7962 					DRM_DEBUG("IH: D5 vline\n");
7963 				}
7964 				break;
7965 			default:
7966 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7967 				break;
7968 			}
7969 			break;
7970 		case 6: /* D6 vblank/vline */
7971 			switch (src_data) {
7972 			case 0: /* D6 vblank */
7973 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7974 					if (rdev->irq.crtc_vblank_int[5]) {
7975 						drm_handle_vblank(rdev->ddev, 5);
7976 						rdev->pm.vblank_sync = true;
7977 						wake_up(&rdev->irq.vblank_queue);
7978 					}
7979 					if (atomic_read(&rdev->irq.pflip[5]))
7980 						radeon_crtc_handle_vblank(rdev, 5);
7981 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7982 					DRM_DEBUG("IH: D6 vblank\n");
7983 				}
7984 				break;
7985 			case 1: /* D6 vline */
7986 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7987 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7988 					DRM_DEBUG("IH: D6 vline\n");
7989 				}
7990 				break;
7991 			default:
7992 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7993 				break;
7994 			}
7995 			break;
7996 		case 8: /* D1 page flip */
7997 		case 10: /* D2 page flip */
7998 		case 12: /* D3 page flip */
7999 		case 14: /* D4 page flip */
8000 		case 16: /* D5 page flip */
8001 		case 18: /* D6 page flip */
8002 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8003 			if (radeon_use_pflipirq > 0)
8004 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8005 			break;
8006 		case 42: /* HPD hotplug */
8007 			switch (src_data) {
8008 			case 0:
8009 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8010 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8011 					queue_hotplug = true;
8012 					DRM_DEBUG("IH: HPD1\n");
8013 				}
8014 				break;
8015 			case 1:
8016 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8017 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8018 					queue_hotplug = true;
8019 					DRM_DEBUG("IH: HPD2\n");
8020 				}
8021 				break;
8022 			case 2:
8023 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8024 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8025 					queue_hotplug = true;
8026 					DRM_DEBUG("IH: HPD3\n");
8027 				}
8028 				break;
8029 			case 3:
8030 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8031 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8032 					queue_hotplug = true;
8033 					DRM_DEBUG("IH: HPD4\n");
8034 				}
8035 				break;
8036 			case 4:
8037 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8038 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8039 					queue_hotplug = true;
8040 					DRM_DEBUG("IH: HPD5\n");
8041 				}
8042 				break;
8043 			case 5:
8044 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8045 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8046 					queue_hotplug = true;
8047 					DRM_DEBUG("IH: HPD6\n");
8048 				}
8049 				break;
8050 			default:
8051 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8052 				break;
8053 			}
8054 			break;
8055 		case 124: /* UVD */
8056 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8057 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8058 			break;
8059 		case 146:
8060 		case 147:
8061 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8062 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8063 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8064 			/* reset addr and status */
8065 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8066 			if (addr == 0x0 && status == 0x0)
8067 				break;
8068 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8069 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8070 				addr);
8071 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8072 				status);
8073 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8074 			break;
8075 		case 167: /* VCE */
8076 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8077 			switch (src_data) {
8078 			case 0:
8079 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8080 				break;
8081 			case 1:
8082 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8083 				break;
8084 			default:
8085 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8086 				break;
8087 			}
8088 			break;
8089 		case 176: /* GFX RB CP_INT */
8090 		case 177: /* GFX IB CP_INT */
8091 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8092 			break;
8093 		case 181: /* CP EOP event */
8094 			DRM_DEBUG("IH: CP EOP\n");
8095 			/* XXX check the bitfield order! */
8096 			me_id = (ring_id & 0x60) >> 5;
8097 			pipe_id = (ring_id & 0x18) >> 3;
8098 			queue_id = (ring_id & 0x7) >> 0;
8099 			switch (me_id) {
8100 			case 0:
8101 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8102 				break;
8103 			case 1:
8104 			case 2:
8105 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8106 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8107 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8108 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8109 				break;
8110 			}
8111 			break;
8112 		case 184: /* CP Privileged reg access */
8113 			DRM_ERROR("Illegal register access in command stream\n");
8114 			/* XXX check the bitfield order! */
8115 			me_id = (ring_id & 0x60) >> 5;
8116 			pipe_id = (ring_id & 0x18) >> 3;
8117 			queue_id = (ring_id & 0x7) >> 0;
8118 			switch (me_id) {
8119 			case 0:
8120 				/* This results in a full GPU reset, but all we need to do is soft
8121 				 * reset the CP for gfx
8122 				 */
8123 				queue_reset = true;
8124 				break;
8125 			case 1:
8126 				/* XXX compute */
8127 				queue_reset = true;
8128 				break;
8129 			case 2:
8130 				/* XXX compute */
8131 				queue_reset = true;
8132 				break;
8133 			}
8134 			break;
8135 		case 185: /* CP Privileged inst */
8136 			DRM_ERROR("Illegal instruction in command stream\n");
8137 			/* XXX check the bitfield order! */
8138 			me_id = (ring_id & 0x60) >> 5;
8139 			pipe_id = (ring_id & 0x18) >> 3;
8140 			queue_id = (ring_id & 0x7) >> 0;
8141 			switch (me_id) {
8142 			case 0:
8143 				/* This results in a full GPU reset, but all we need to do is soft
8144 				 * reset the CP for gfx
8145 				 */
8146 				queue_reset = true;
8147 				break;
8148 			case 1:
8149 				/* XXX compute */
8150 				queue_reset = true;
8151 				break;
8152 			case 2:
8153 				/* XXX compute */
8154 				queue_reset = true;
8155 				break;
8156 			}
8157 			break;
8158 		case 224: /* SDMA trap event */
8159 			/* XXX check the bitfield order! */
8160 			me_id = (ring_id & 0x3) >> 0;
8161 			queue_id = (ring_id & 0xc) >> 2;
8162 			DRM_DEBUG("IH: SDMA trap\n");
8163 			switch (me_id) {
8164 			case 0:
8165 				switch (queue_id) {
8166 				case 0:
8167 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8168 					break;
8169 				case 1:
8170 					/* XXX compute */
8171 					break;
8172 				case 2:
8173 					/* XXX compute */
8174 					break;
8175 				}
8176 				break;
8177 			case 1:
8178 				switch (queue_id) {
8179 				case 0:
8180 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8181 					break;
8182 				case 1:
8183 					/* XXX compute */
8184 					break;
8185 				case 2:
8186 					/* XXX compute */
8187 					break;
8188 				}
8189 				break;
8190 			}
8191 			break;
8192 		case 230: /* thermal low to high */
8193 			DRM_DEBUG("IH: thermal low to high\n");
8194 			rdev->pm.dpm.thermal.high_to_low = false;
8195 			queue_thermal = true;
8196 			break;
8197 		case 231: /* thermal high to low */
8198 			DRM_DEBUG("IH: thermal high to low\n");
8199 			rdev->pm.dpm.thermal.high_to_low = true;
8200 			queue_thermal = true;
8201 			break;
8202 		case 233: /* GUI IDLE */
8203 			DRM_DEBUG("IH: GUI idle\n");
8204 			break;
8205 		case 241: /* SDMA Privileged inst */
8206 		case 247: /* SDMA Privileged inst */
8207 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8208 			/* XXX check the bitfield order! */
8209 			me_id = (ring_id & 0x3) >> 0;
8210 			queue_id = (ring_id & 0xc) >> 2;
8211 			switch (me_id) {
8212 			case 0:
8213 				switch (queue_id) {
8214 				case 0:
8215 					queue_reset = true;
8216 					break;
8217 				case 1:
8218 					/* XXX compute */
8219 					queue_reset = true;
8220 					break;
8221 				case 2:
8222 					/* XXX compute */
8223 					queue_reset = true;
8224 					break;
8225 				}
8226 				break;
8227 			case 1:
8228 				switch (queue_id) {
8229 				case 0:
8230 					queue_reset = true;
8231 					break;
8232 				case 1:
8233 					/* XXX compute */
8234 					queue_reset = true;
8235 					break;
8236 				case 2:
8237 					/* XXX compute */
8238 					queue_reset = true;
8239 					break;
8240 				}
8241 				break;
8242 			}
8243 			break;
8244 		default:
8245 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8246 			break;
8247 		}
8248 
8249 		/* wptr/rptr are in bytes! */
8250 		rptr += 16;
8251 		rptr &= rdev->ih.ptr_mask;
8252 		WREG32(IH_RB_RPTR, rptr);
8253 	}
8254 	if (queue_hotplug)
8255 		schedule_work(&rdev->hotplug_work);
8256 	if (queue_reset) {
8257 		rdev->needs_reset = true;
8258 		wake_up_all(&rdev->fence_queue);
8259 	}
8260 	if (queue_thermal)
8261 		schedule_work(&rdev->pm.dpm.thermal.work);
8262 	rdev->ih.rptr = rptr;
8263 	atomic_set(&rdev->ih.lock, 0);
8264 
8265 	/* make sure wptr hasn't changed while processing */
8266 	wptr = cik_get_ih_wptr(rdev);
8267 	if (wptr != rptr)
8268 		goto restart_ih;
8269 
8270 	return IRQ_HANDLED;
8271 }
8272 
8273 /*
8274  * startup/shutdown callbacks
8275  */
8276 /**
8277  * cik_startup - program the asic to a functional state
8278  *
8279  * @rdev: radeon_device pointer
8280  *
8281  * Programs the asic to a functional state (CIK).
8282  * Called by cik_init() and cik_resume().
8283  * Returns 0 for success, error for failure.
8284  */
8285 static int cik_startup(struct radeon_device *rdev)
8286 {
8287 	struct radeon_ring *ring;
8288 	u32 nop;
8289 	int r;
8290 
8291 	/* enable pcie gen2/3 link */
8292 	cik_pcie_gen3_enable(rdev);
8293 	/* enable aspm */
8294 	cik_program_aspm(rdev);
8295 
8296 	/* scratch needs to be initialized before MC */
8297 	r = r600_vram_scratch_init(rdev);
8298 	if (r)
8299 		return r;
8300 
8301 	cik_mc_program(rdev);
8302 
8303 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8304 		r = ci_mc_load_microcode(rdev);
8305 		if (r) {
8306 			DRM_ERROR("Failed to load MC firmware!\n");
8307 			return r;
8308 		}
8309 	}
8310 
8311 	r = cik_pcie_gart_enable(rdev);
8312 	if (r)
8313 		return r;
8314 	cik_gpu_init(rdev);
8315 
8316 	/* allocate rlc buffers */
8317 	if (rdev->flags & RADEON_IS_IGP) {
8318 		if (rdev->family == CHIP_KAVERI) {
8319 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8320 			rdev->rlc.reg_list_size =
8321 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8322 		} else {
8323 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8324 			rdev->rlc.reg_list_size =
8325 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8326 		}
8327 	}
8328 	rdev->rlc.cs_data = ci_cs_data;
8329 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8330 	r = sumo_rlc_init(rdev);
8331 	if (r) {
8332 		DRM_ERROR("Failed to init rlc BOs!\n");
8333 		return r;
8334 	}
8335 
8336 	/* allocate wb buffer */
8337 	r = radeon_wb_init(rdev);
8338 	if (r)
8339 		return r;
8340 
8341 	/* allocate mec buffers */
8342 	r = cik_mec_init(rdev);
8343 	if (r) {
8344 		DRM_ERROR("Failed to init MEC BOs!\n");
8345 		return r;
8346 	}
8347 
8348 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8349 	if (r) {
8350 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8351 		return r;
8352 	}
8353 
8354 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8355 	if (r) {
8356 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8357 		return r;
8358 	}
8359 
8360 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8361 	if (r) {
8362 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8363 		return r;
8364 	}
8365 
8366 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8367 	if (r) {
8368 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8369 		return r;
8370 	}
8371 
8372 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8373 	if (r) {
8374 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8375 		return r;
8376 	}
8377 
8378 	r = radeon_uvd_resume(rdev);
8379 	if (!r) {
8380 		r = uvd_v4_2_resume(rdev);
8381 		if (!r) {
8382 			r = radeon_fence_driver_start_ring(rdev,
8383 							   R600_RING_TYPE_UVD_INDEX);
8384 			if (r)
8385 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8386 		}
8387 	}
8388 	if (r)
8389 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8390 
8391 	r = radeon_vce_resume(rdev);
8392 	if (!r) {
8393 		r = vce_v2_0_resume(rdev);
8394 		if (!r)
8395 			r = radeon_fence_driver_start_ring(rdev,
8396 							   TN_RING_TYPE_VCE1_INDEX);
8397 		if (!r)
8398 			r = radeon_fence_driver_start_ring(rdev,
8399 							   TN_RING_TYPE_VCE2_INDEX);
8400 	}
8401 	if (r) {
8402 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8403 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8404 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8405 	}
8406 
8407 	/* Enable IRQ */
8408 	if (!rdev->irq.installed) {
8409 		r = radeon_irq_kms_init(rdev);
8410 		if (r)
8411 			return r;
8412 	}
8413 
8414 	r = cik_irq_init(rdev);
8415 	if (r) {
8416 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8417 		radeon_irq_kms_fini(rdev);
8418 		return r;
8419 	}
8420 	cik_irq_set(rdev);
8421 
8422 	if (rdev->family == CHIP_HAWAII) {
8423 		if (rdev->new_fw)
8424 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8425 		else
8426 			nop = RADEON_CP_PACKET2;
8427 	} else {
8428 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8429 	}
8430 
8431 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8432 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8433 			     nop);
8434 	if (r)
8435 		return r;
8436 
8437 	/* set up the compute queues */
8438 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8439 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8440 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8441 			     nop);
8442 	if (r)
8443 		return r;
8444 	ring->me = 1; /* first MEC */
8445 	ring->pipe = 0; /* first pipe */
8446 	ring->queue = 0; /* first queue */
8447 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8448 
8449 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8450 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8451 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8452 			     nop);
8453 	if (r)
8454 		return r;
8455 	/* dGPU only have 1 MEC */
8456 	ring->me = 1; /* first MEC */
8457 	ring->pipe = 0; /* first pipe */
8458 	ring->queue = 1; /* second queue */
8459 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8460 
8461 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8462 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8463 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8464 	if (r)
8465 		return r;
8466 
8467 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8468 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8469 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8470 	if (r)
8471 		return r;
8472 
8473 	r = cik_cp_resume(rdev);
8474 	if (r)
8475 		return r;
8476 
8477 	r = cik_sdma_resume(rdev);
8478 	if (r)
8479 		return r;
8480 
8481 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8482 	if (ring->ring_size) {
8483 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8484 				     RADEON_CP_PACKET2);
8485 		if (!r)
8486 			r = uvd_v1_0_init(rdev);
8487 		if (r)
8488 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8489 	}
8490 
8491 	r = -ENOENT;
8492 
8493 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8494 	if (ring->ring_size)
8495 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8496 				     VCE_CMD_NO_OP);
8497 
8498 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8499 	if (ring->ring_size)
8500 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8501 				     VCE_CMD_NO_OP);
8502 
8503 	if (!r)
8504 		r = vce_v1_0_init(rdev);
8505 	else if (r != -ENOENT)
8506 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8507 
8508 	r = radeon_ib_pool_init(rdev);
8509 	if (r) {
8510 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8511 		return r;
8512 	}
8513 
8514 	r = radeon_vm_manager_init(rdev);
8515 	if (r) {
8516 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8517 		return r;
8518 	}
8519 
8520 	r = dce6_audio_init(rdev);
8521 	if (r)
8522 		return r;
8523 
8524 	return 0;
8525 }
8526 
8527 /**
8528  * cik_resume - resume the asic to a functional state
8529  *
8530  * @rdev: radeon_device pointer
8531  *
8532  * Programs the asic to a functional state (CIK).
8533  * Called at resume.
8534  * Returns 0 for success, error for failure.
8535  */
8536 int cik_resume(struct radeon_device *rdev)
8537 {
8538 	int r;
8539 
8540 	/* post card */
8541 	atom_asic_init(rdev->mode_info.atom_context);
8542 
8543 	/* init golden registers */
8544 	cik_init_golden_registers(rdev);
8545 
8546 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8547 		radeon_pm_resume(rdev);
8548 
8549 	rdev->accel_working = true;
8550 	r = cik_startup(rdev);
8551 	if (r) {
8552 		DRM_ERROR("cik startup failed on resume\n");
8553 		rdev->accel_working = false;
8554 		return r;
8555 	}
8556 
8557 	return r;
8558 
8559 }
8560 
8561 /**
8562  * cik_suspend - suspend the asic
8563  *
8564  * @rdev: radeon_device pointer
8565  *
8566  * Bring the chip into a state suitable for suspend (CIK).
8567  * Called at suspend.
8568  * Returns 0 for success.
8569  */
8570 int cik_suspend(struct radeon_device *rdev)
8571 {
8572 	radeon_pm_suspend(rdev);
8573 	dce6_audio_fini(rdev);
8574 	radeon_vm_manager_fini(rdev);
8575 	cik_cp_enable(rdev, false);
8576 	cik_sdma_enable(rdev, false);
8577 	uvd_v1_0_fini(rdev);
8578 	radeon_uvd_suspend(rdev);
8579 	radeon_vce_suspend(rdev);
8580 	cik_fini_pg(rdev);
8581 	cik_fini_cg(rdev);
8582 	cik_irq_suspend(rdev);
8583 	radeon_wb_disable(rdev);
8584 	cik_pcie_gart_disable(rdev);
8585 	return 0;
8586 }
8587 
8588 /* Plan is to move initialization in that function and use
8589  * helper function so that radeon_device_init pretty much
8590  * do nothing more than calling asic specific function. This
8591  * should also allow to remove a bunch of callback function
8592  * like vram_info.
8593  */
8594 /**
8595  * cik_init - asic specific driver and hw init
8596  *
8597  * @rdev: radeon_device pointer
8598  *
8599  * Setup asic specific driver variables and program the hw
8600  * to a functional state (CIK).
8601  * Called at driver startup.
8602  * Returns 0 for success, errors for failure.
8603  */
8604 int cik_init(struct radeon_device *rdev)
8605 {
8606 	struct radeon_ring *ring;
8607 	int r;
8608 
8609 	/* Read BIOS */
8610 	if (!radeon_get_bios(rdev)) {
8611 		if (ASIC_IS_AVIVO(rdev))
8612 			return -EINVAL;
8613 	}
8614 	/* Must be an ATOMBIOS */
8615 	if (!rdev->is_atom_bios) {
8616 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8617 		return -EINVAL;
8618 	}
8619 	r = radeon_atombios_init(rdev);
8620 	if (r)
8621 		return r;
8622 
8623 	/* Post card if necessary */
8624 	if (!radeon_card_posted(rdev)) {
8625 		if (!rdev->bios) {
8626 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8627 			return -EINVAL;
8628 		}
8629 		DRM_INFO("GPU not posted. posting now...\n");
8630 		atom_asic_init(rdev->mode_info.atom_context);
8631 	}
8632 	/* init golden registers */
8633 	cik_init_golden_registers(rdev);
8634 	/* Initialize scratch registers */
8635 	cik_scratch_init(rdev);
8636 	/* Initialize surface registers */
8637 	radeon_surface_init(rdev);
8638 	/* Initialize clocks */
8639 	radeon_get_clock_info(rdev->ddev);
8640 
8641 	/* Fence driver */
8642 	r = radeon_fence_driver_init(rdev);
8643 	if (r)
8644 		return r;
8645 
8646 	/* initialize memory controller */
8647 	r = cik_mc_init(rdev);
8648 	if (r)
8649 		return r;
8650 	/* Memory manager */
8651 	r = radeon_bo_init(rdev);
8652 	if (r)
8653 		return r;
8654 
8655 	if (rdev->flags & RADEON_IS_IGP) {
8656 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8657 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8658 			r = cik_init_microcode(rdev);
8659 			if (r) {
8660 				DRM_ERROR("Failed to load firmware!\n");
8661 				return r;
8662 			}
8663 		}
8664 	} else {
8665 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8666 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8667 		    !rdev->mc_fw) {
8668 			r = cik_init_microcode(rdev);
8669 			if (r) {
8670 				DRM_ERROR("Failed to load firmware!\n");
8671 				return r;
8672 			}
8673 		}
8674 	}
8675 
8676 	/* Initialize power management */
8677 	radeon_pm_init(rdev);
8678 
8679 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8680 	ring->ring_obj = NULL;
8681 	r600_ring_init(rdev, ring, 1024 * 1024);
8682 
8683 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8684 	ring->ring_obj = NULL;
8685 	r600_ring_init(rdev, ring, 1024 * 1024);
8686 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8687 	if (r)
8688 		return r;
8689 
8690 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8691 	ring->ring_obj = NULL;
8692 	r600_ring_init(rdev, ring, 1024 * 1024);
8693 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8694 	if (r)
8695 		return r;
8696 
8697 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8698 	ring->ring_obj = NULL;
8699 	r600_ring_init(rdev, ring, 256 * 1024);
8700 
8701 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8702 	ring->ring_obj = NULL;
8703 	r600_ring_init(rdev, ring, 256 * 1024);
8704 
8705 	r = radeon_uvd_init(rdev);
8706 	if (!r) {
8707 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8708 		ring->ring_obj = NULL;
8709 		r600_ring_init(rdev, ring, 4096);
8710 	}
8711 
8712 	r = radeon_vce_init(rdev);
8713 	if (!r) {
8714 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8715 		ring->ring_obj = NULL;
8716 		r600_ring_init(rdev, ring, 4096);
8717 
8718 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8719 		ring->ring_obj = NULL;
8720 		r600_ring_init(rdev, ring, 4096);
8721 	}
8722 
8723 	rdev->ih.ring_obj = NULL;
8724 	r600_ih_ring_init(rdev, 64 * 1024);
8725 
8726 	r = r600_pcie_gart_init(rdev);
8727 	if (r)
8728 		return r;
8729 
8730 	rdev->accel_working = true;
8731 	r = cik_startup(rdev);
8732 	if (r) {
8733 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8734 		cik_cp_fini(rdev);
8735 		cik_sdma_fini(rdev);
8736 		cik_irq_fini(rdev);
8737 		sumo_rlc_fini(rdev);
8738 		cik_mec_fini(rdev);
8739 		radeon_wb_fini(rdev);
8740 		radeon_ib_pool_fini(rdev);
8741 		radeon_vm_manager_fini(rdev);
8742 		radeon_irq_kms_fini(rdev);
8743 		cik_pcie_gart_fini(rdev);
8744 		rdev->accel_working = false;
8745 	}
8746 
8747 	/* Don't start up if the MC ucode is missing.
8748 	 * The default clocks and voltages before the MC ucode
8749 	 * is loaded are not suffient for advanced operations.
8750 	 */
8751 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8752 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8753 		return -EINVAL;
8754 	}
8755 
8756 	return 0;
8757 }
8758 
8759 /**
8760  * cik_fini - asic specific driver and hw fini
8761  *
8762  * @rdev: radeon_device pointer
8763  *
8764  * Tear down the asic specific driver variables and program the hw
8765  * to an idle state (CIK).
8766  * Called at driver unload.
8767  */
8768 void cik_fini(struct radeon_device *rdev)
8769 {
8770 	radeon_pm_fini(rdev);
8771 	cik_cp_fini(rdev);
8772 	cik_sdma_fini(rdev);
8773 	cik_fini_pg(rdev);
8774 	cik_fini_cg(rdev);
8775 	cik_irq_fini(rdev);
8776 	sumo_rlc_fini(rdev);
8777 	cik_mec_fini(rdev);
8778 	radeon_wb_fini(rdev);
8779 	radeon_vm_manager_fini(rdev);
8780 	radeon_ib_pool_fini(rdev);
8781 	radeon_irq_kms_fini(rdev);
8782 	uvd_v1_0_fini(rdev);
8783 	radeon_uvd_fini(rdev);
8784 	radeon_vce_fini(rdev);
8785 	cik_pcie_gart_fini(rdev);
8786 	r600_vram_scratch_fini(rdev);
8787 	radeon_gem_fini(rdev);
8788 	radeon_fence_driver_fini(rdev);
8789 	radeon_bo_fini(rdev);
8790 	radeon_atombios_fini(rdev);
8791 	kfree(rdev->bios);
8792 	rdev->bios = NULL;
8793 }
8794 
8795 void dce8_program_fmt(struct drm_encoder *encoder)
8796 {
8797 	struct drm_device *dev = encoder->dev;
8798 	struct radeon_device *rdev = dev->dev_private;
8799 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8800 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8801 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8802 	int bpc = 0;
8803 	u32 tmp = 0;
8804 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8805 
8806 	if (connector) {
8807 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8808 		bpc = radeon_get_monitor_bpc(connector);
8809 		dither = radeon_connector->dither;
8810 	}
8811 
8812 	/* LVDS/eDP FMT is set up by atom */
8813 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8814 		return;
8815 
8816 	/* not needed for analog */
8817 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8818 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8819 		return;
8820 
8821 	if (bpc == 0)
8822 		return;
8823 
8824 	switch (bpc) {
8825 	case 6:
8826 		if (dither == RADEON_FMT_DITHER_ENABLE)
8827 			/* XXX sort out optimal dither settings */
8828 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8829 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8830 		else
8831 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8832 		break;
8833 	case 8:
8834 		if (dither == RADEON_FMT_DITHER_ENABLE)
8835 			/* XXX sort out optimal dither settings */
8836 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8837 				FMT_RGB_RANDOM_ENABLE |
8838 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8839 		else
8840 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8841 		break;
8842 	case 10:
8843 		if (dither == RADEON_FMT_DITHER_ENABLE)
8844 			/* XXX sort out optimal dither settings */
8845 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8846 				FMT_RGB_RANDOM_ENABLE |
8847 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8848 		else
8849 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8850 		break;
8851 	default:
8852 		/* not needed */
8853 		break;
8854 	}
8855 
8856 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8857 }
8858 
8859 /* display watermark setup */
8860 /**
8861  * dce8_line_buffer_adjust - Set up the line buffer
8862  *
8863  * @rdev: radeon_device pointer
8864  * @radeon_crtc: the selected display controller
8865  * @mode: the current display mode on the selected display
8866  * controller
8867  *
8868  * Setup up the line buffer allocation for
8869  * the selected display controller (CIK).
8870  * Returns the line buffer size in pixels.
8871  */
8872 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8873 				   struct radeon_crtc *radeon_crtc,
8874 				   struct drm_display_mode *mode)
8875 {
8876 	u32 tmp, buffer_alloc, i;
8877 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8878 	/*
8879 	 * Line Buffer Setup
8880 	 * There are 6 line buffers, one for each display controllers.
8881 	 * There are 3 partitions per LB. Select the number of partitions
8882 	 * to enable based on the display width.  For display widths larger
8883 	 * than 4096, you need use to use 2 display controllers and combine
8884 	 * them using the stereo blender.
8885 	 */
8886 	if (radeon_crtc->base.enabled && mode) {
8887 		if (mode->crtc_hdisplay < 1920) {
8888 			tmp = 1;
8889 			buffer_alloc = 2;
8890 		} else if (mode->crtc_hdisplay < 2560) {
8891 			tmp = 2;
8892 			buffer_alloc = 2;
8893 		} else if (mode->crtc_hdisplay < 4096) {
8894 			tmp = 0;
8895 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8896 		} else {
8897 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8898 			tmp = 0;
8899 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8900 		}
8901 	} else {
8902 		tmp = 1;
8903 		buffer_alloc = 0;
8904 	}
8905 
8906 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8907 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8908 
8909 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8910 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8911 	for (i = 0; i < rdev->usec_timeout; i++) {
8912 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8913 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8914 			break;
8915 		udelay(1);
8916 	}
8917 
8918 	if (radeon_crtc->base.enabled && mode) {
8919 		switch (tmp) {
8920 		case 0:
8921 		default:
8922 			return 4096 * 2;
8923 		case 1:
8924 			return 1920 * 2;
8925 		case 2:
8926 			return 2560 * 2;
8927 		}
8928 	}
8929 
8930 	/* controller not enabled, so no lb used */
8931 	return 0;
8932 }
8933 
8934 /**
8935  * cik_get_number_of_dram_channels - get the number of dram channels
8936  *
8937  * @rdev: radeon_device pointer
8938  *
8939  * Look up the number of video ram channels (CIK).
8940  * Used for display watermark bandwidth calculations
8941  * Returns the number of dram channels
8942  */
8943 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8944 {
8945 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8946 
8947 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8948 	case 0:
8949 	default:
8950 		return 1;
8951 	case 1:
8952 		return 2;
8953 	case 2:
8954 		return 4;
8955 	case 3:
8956 		return 8;
8957 	case 4:
8958 		return 3;
8959 	case 5:
8960 		return 6;
8961 	case 6:
8962 		return 10;
8963 	case 7:
8964 		return 12;
8965 	case 8:
8966 		return 16;
8967 	}
8968 }
8969 
8970 struct dce8_wm_params {
8971 	u32 dram_channels; /* number of dram channels */
8972 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8973 	u32 sclk;          /* engine clock in kHz */
8974 	u32 disp_clk;      /* display clock in kHz */
8975 	u32 src_width;     /* viewport width */
8976 	u32 active_time;   /* active display time in ns */
8977 	u32 blank_time;    /* blank time in ns */
8978 	bool interlaced;    /* mode is interlaced */
8979 	fixed20_12 vsc;    /* vertical scale ratio */
8980 	u32 num_heads;     /* number of active crtcs */
8981 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8982 	u32 lb_size;       /* line buffer allocated to pipe */
8983 	u32 vtaps;         /* vertical scaler taps */
8984 };
8985 
8986 /**
8987  * dce8_dram_bandwidth - get the dram bandwidth
8988  *
8989  * @wm: watermark calculation data
8990  *
8991  * Calculate the raw dram bandwidth (CIK).
8992  * Used for display watermark bandwidth calculations
8993  * Returns the dram bandwidth in MBytes/s
8994  */
8995 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8996 {
8997 	/* Calculate raw DRAM Bandwidth */
8998 	fixed20_12 dram_efficiency; /* 0.7 */
8999 	fixed20_12 yclk, dram_channels, bandwidth;
9000 	fixed20_12 a;
9001 
9002 	a.full = dfixed_const(1000);
9003 	yclk.full = dfixed_const(wm->yclk);
9004 	yclk.full = dfixed_div(yclk, a);
9005 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9006 	a.full = dfixed_const(10);
9007 	dram_efficiency.full = dfixed_const(7);
9008 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9009 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9010 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9011 
9012 	return dfixed_trunc(bandwidth);
9013 }
9014 
9015 /**
9016  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9017  *
9018  * @wm: watermark calculation data
9019  *
9020  * Calculate the dram bandwidth used for display (CIK).
9021  * Used for display watermark bandwidth calculations
9022  * Returns the dram bandwidth for display in MBytes/s
9023  */
9024 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9025 {
9026 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9027 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9028 	fixed20_12 yclk, dram_channels, bandwidth;
9029 	fixed20_12 a;
9030 
9031 	a.full = dfixed_const(1000);
9032 	yclk.full = dfixed_const(wm->yclk);
9033 	yclk.full = dfixed_div(yclk, a);
9034 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9035 	a.full = dfixed_const(10);
9036 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9037 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9038 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9039 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9040 
9041 	return dfixed_trunc(bandwidth);
9042 }
9043 
9044 /**
9045  * dce8_data_return_bandwidth - get the data return bandwidth
9046  *
9047  * @wm: watermark calculation data
9048  *
9049  * Calculate the data return bandwidth used for display (CIK).
9050  * Used for display watermark bandwidth calculations
9051  * Returns the data return bandwidth in MBytes/s
9052  */
9053 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9054 {
9055 	/* Calculate the display Data return Bandwidth */
9056 	fixed20_12 return_efficiency; /* 0.8 */
9057 	fixed20_12 sclk, bandwidth;
9058 	fixed20_12 a;
9059 
9060 	a.full = dfixed_const(1000);
9061 	sclk.full = dfixed_const(wm->sclk);
9062 	sclk.full = dfixed_div(sclk, a);
9063 	a.full = dfixed_const(10);
9064 	return_efficiency.full = dfixed_const(8);
9065 	return_efficiency.full = dfixed_div(return_efficiency, a);
9066 	a.full = dfixed_const(32);
9067 	bandwidth.full = dfixed_mul(a, sclk);
9068 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9069 
9070 	return dfixed_trunc(bandwidth);
9071 }
9072 
9073 /**
9074  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9075  *
9076  * @wm: watermark calculation data
9077  *
9078  * Calculate the dmif bandwidth used for display (CIK).
9079  * Used for display watermark bandwidth calculations
9080  * Returns the dmif bandwidth in MBytes/s
9081  */
9082 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9083 {
9084 	/* Calculate the DMIF Request Bandwidth */
9085 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9086 	fixed20_12 disp_clk, bandwidth;
9087 	fixed20_12 a, b;
9088 
9089 	a.full = dfixed_const(1000);
9090 	disp_clk.full = dfixed_const(wm->disp_clk);
9091 	disp_clk.full = dfixed_div(disp_clk, a);
9092 	a.full = dfixed_const(32);
9093 	b.full = dfixed_mul(a, disp_clk);
9094 
9095 	a.full = dfixed_const(10);
9096 	disp_clk_request_efficiency.full = dfixed_const(8);
9097 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9098 
9099 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9100 
9101 	return dfixed_trunc(bandwidth);
9102 }
9103 
9104 /**
9105  * dce8_available_bandwidth - get the min available bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the min available bandwidth used for display (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the min available bandwidth in MBytes/s
9112  */
9113 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9114 {
9115 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9116 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9117 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9118 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9119 
9120 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9121 }
9122 
9123 /**
9124  * dce8_average_bandwidth - get the average available bandwidth
9125  *
9126  * @wm: watermark calculation data
9127  *
9128  * Calculate the average available bandwidth used for display (CIK).
9129  * Used for display watermark bandwidth calculations
9130  * Returns the average available bandwidth in MBytes/s
9131  */
9132 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9133 {
9134 	/* Calculate the display mode Average Bandwidth
9135 	 * DisplayMode should contain the source and destination dimensions,
9136 	 * timing, etc.
9137 	 */
9138 	fixed20_12 bpp;
9139 	fixed20_12 line_time;
9140 	fixed20_12 src_width;
9141 	fixed20_12 bandwidth;
9142 	fixed20_12 a;
9143 
9144 	a.full = dfixed_const(1000);
9145 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9146 	line_time.full = dfixed_div(line_time, a);
9147 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9148 	src_width.full = dfixed_const(wm->src_width);
9149 	bandwidth.full = dfixed_mul(src_width, bpp);
9150 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9151 	bandwidth.full = dfixed_div(bandwidth, line_time);
9152 
9153 	return dfixed_trunc(bandwidth);
9154 }
9155 
9156 /**
9157  * dce8_latency_watermark - get the latency watermark
9158  *
9159  * @wm: watermark calculation data
9160  *
9161  * Calculate the latency watermark (CIK).
9162  * Used for display watermark bandwidth calculations
9163  * Returns the latency watermark in ns
9164  */
9165 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9166 {
9167 	/* First calculate the latency in ns */
9168 	u32 mc_latency = 2000; /* 2000 ns. */
9169 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9170 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9171 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9172 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9173 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9174 		(wm->num_heads * cursor_line_pair_return_time);
9175 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9176 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9177 	u32 tmp, dmif_size = 12288;
9178 	fixed20_12 a, b, c;
9179 
9180 	if (wm->num_heads == 0)
9181 		return 0;
9182 
9183 	a.full = dfixed_const(2);
9184 	b.full = dfixed_const(1);
9185 	if ((wm->vsc.full > a.full) ||
9186 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9187 	    (wm->vtaps >= 5) ||
9188 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9189 		max_src_lines_per_dst_line = 4;
9190 	else
9191 		max_src_lines_per_dst_line = 2;
9192 
9193 	a.full = dfixed_const(available_bandwidth);
9194 	b.full = dfixed_const(wm->num_heads);
9195 	a.full = dfixed_div(a, b);
9196 
9197 	b.full = dfixed_const(mc_latency + 512);
9198 	c.full = dfixed_const(wm->disp_clk);
9199 	b.full = dfixed_div(b, c);
9200 
9201 	c.full = dfixed_const(dmif_size);
9202 	b.full = dfixed_div(c, b);
9203 
9204 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9205 
9206 	b.full = dfixed_const(1000);
9207 	c.full = dfixed_const(wm->disp_clk);
9208 	b.full = dfixed_div(c, b);
9209 	c.full = dfixed_const(wm->bytes_per_pixel);
9210 	b.full = dfixed_mul(b, c);
9211 
9212 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9213 
9214 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9215 	b.full = dfixed_const(1000);
9216 	c.full = dfixed_const(lb_fill_bw);
9217 	b.full = dfixed_div(c, b);
9218 	a.full = dfixed_div(a, b);
9219 	line_fill_time = dfixed_trunc(a);
9220 
9221 	if (line_fill_time < wm->active_time)
9222 		return latency;
9223 	else
9224 		return latency + (line_fill_time - wm->active_time);
9225 
9226 }
9227 
9228 /**
9229  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9230  * average and available dram bandwidth
9231  *
9232  * @wm: watermark calculation data
9233  *
9234  * Check if the display average bandwidth fits in the display
9235  * dram bandwidth (CIK).
9236  * Used for display watermark bandwidth calculations
9237  * Returns true if the display fits, false if not.
9238  */
9239 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9240 {
9241 	if (dce8_average_bandwidth(wm) <=
9242 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9243 		return true;
9244 	else
9245 		return false;
9246 }
9247 
9248 /**
9249  * dce8_average_bandwidth_vs_available_bandwidth - check
9250  * average and available bandwidth
9251  *
9252  * @wm: watermark calculation data
9253  *
9254  * Check if the display average bandwidth fits in the display
9255  * available bandwidth (CIK).
9256  * Used for display watermark bandwidth calculations
9257  * Returns true if the display fits, false if not.
9258  */
9259 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9260 {
9261 	if (dce8_average_bandwidth(wm) <=
9262 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9263 		return true;
9264 	else
9265 		return false;
9266 }
9267 
9268 /**
9269  * dce8_check_latency_hiding - check latency hiding
9270  *
9271  * @wm: watermark calculation data
9272  *
9273  * Check latency hiding (CIK).
9274  * Used for display watermark bandwidth calculations
9275  * Returns true if the display fits, false if not.
9276  */
9277 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9278 {
9279 	u32 lb_partitions = wm->lb_size / wm->src_width;
9280 	u32 line_time = wm->active_time + wm->blank_time;
9281 	u32 latency_tolerant_lines;
9282 	u32 latency_hiding;
9283 	fixed20_12 a;
9284 
9285 	a.full = dfixed_const(1);
9286 	if (wm->vsc.full > a.full)
9287 		latency_tolerant_lines = 1;
9288 	else {
9289 		if (lb_partitions <= (wm->vtaps + 1))
9290 			latency_tolerant_lines = 1;
9291 		else
9292 			latency_tolerant_lines = 2;
9293 	}
9294 
9295 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9296 
9297 	if (dce8_latency_watermark(wm) <= latency_hiding)
9298 		return true;
9299 	else
9300 		return false;
9301 }
9302 
9303 /**
9304  * dce8_program_watermarks - program display watermarks
9305  *
9306  * @rdev: radeon_device pointer
9307  * @radeon_crtc: the selected display controller
9308  * @lb_size: line buffer size
9309  * @num_heads: number of display controllers in use
9310  *
9311  * Calculate and program the display watermarks for the
9312  * selected display controller (CIK).
9313  */
9314 static void dce8_program_watermarks(struct radeon_device *rdev,
9315 				    struct radeon_crtc *radeon_crtc,
9316 				    u32 lb_size, u32 num_heads)
9317 {
9318 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9319 	struct dce8_wm_params wm_low, wm_high;
9320 	u32 pixel_period;
9321 	u32 line_time = 0;
9322 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9323 	u32 tmp, wm_mask;
9324 
9325 	if (radeon_crtc->base.enabled && num_heads && mode) {
9326 		pixel_period = 1000000 / (u32)mode->clock;
9327 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9328 
9329 		/* watermark for high clocks */
9330 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9331 		    rdev->pm.dpm_enabled) {
9332 			wm_high.yclk =
9333 				radeon_dpm_get_mclk(rdev, false) * 10;
9334 			wm_high.sclk =
9335 				radeon_dpm_get_sclk(rdev, false) * 10;
9336 		} else {
9337 			wm_high.yclk = rdev->pm.current_mclk * 10;
9338 			wm_high.sclk = rdev->pm.current_sclk * 10;
9339 		}
9340 
9341 		wm_high.disp_clk = mode->clock;
9342 		wm_high.src_width = mode->crtc_hdisplay;
9343 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9344 		wm_high.blank_time = line_time - wm_high.active_time;
9345 		wm_high.interlaced = false;
9346 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9347 			wm_high.interlaced = true;
9348 		wm_high.vsc = radeon_crtc->vsc;
9349 		wm_high.vtaps = 1;
9350 		if (radeon_crtc->rmx_type != RMX_OFF)
9351 			wm_high.vtaps = 2;
9352 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9353 		wm_high.lb_size = lb_size;
9354 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9355 		wm_high.num_heads = num_heads;
9356 
9357 		/* set for high clocks */
9358 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9359 
9360 		/* possibly force display priority to high */
9361 		/* should really do this at mode validation time... */
9362 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9363 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9364 		    !dce8_check_latency_hiding(&wm_high) ||
9365 		    (rdev->disp_priority == 2)) {
9366 			DRM_DEBUG_KMS("force priority to high\n");
9367 		}
9368 
9369 		/* watermark for low clocks */
9370 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9371 		    rdev->pm.dpm_enabled) {
9372 			wm_low.yclk =
9373 				radeon_dpm_get_mclk(rdev, true) * 10;
9374 			wm_low.sclk =
9375 				radeon_dpm_get_sclk(rdev, true) * 10;
9376 		} else {
9377 			wm_low.yclk = rdev->pm.current_mclk * 10;
9378 			wm_low.sclk = rdev->pm.current_sclk * 10;
9379 		}
9380 
9381 		wm_low.disp_clk = mode->clock;
9382 		wm_low.src_width = mode->crtc_hdisplay;
9383 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9384 		wm_low.blank_time = line_time - wm_low.active_time;
9385 		wm_low.interlaced = false;
9386 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9387 			wm_low.interlaced = true;
9388 		wm_low.vsc = radeon_crtc->vsc;
9389 		wm_low.vtaps = 1;
9390 		if (radeon_crtc->rmx_type != RMX_OFF)
9391 			wm_low.vtaps = 2;
9392 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9393 		wm_low.lb_size = lb_size;
9394 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9395 		wm_low.num_heads = num_heads;
9396 
9397 		/* set for low clocks */
9398 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9399 
9400 		/* possibly force display priority to high */
9401 		/* should really do this at mode validation time... */
9402 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9403 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9404 		    !dce8_check_latency_hiding(&wm_low) ||
9405 		    (rdev->disp_priority == 2)) {
9406 			DRM_DEBUG_KMS("force priority to high\n");
9407 		}
9408 	}
9409 
9410 	/* select wm A */
9411 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9412 	tmp = wm_mask;
9413 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9414 	tmp |= LATENCY_WATERMARK_MASK(1);
9415 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9416 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9417 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9418 		LATENCY_HIGH_WATERMARK(line_time)));
9419 	/* select wm B */
9420 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9421 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9422 	tmp |= LATENCY_WATERMARK_MASK(2);
9423 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9424 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9425 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9426 		LATENCY_HIGH_WATERMARK(line_time)));
9427 	/* restore original selection */
9428 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9429 
9430 	/* save values for DPM */
9431 	radeon_crtc->line_time = line_time;
9432 	radeon_crtc->wm_high = latency_watermark_a;
9433 	radeon_crtc->wm_low = latency_watermark_b;
9434 }
9435 
9436 /**
9437  * dce8_bandwidth_update - program display watermarks
9438  *
9439  * @rdev: radeon_device pointer
9440  *
9441  * Calculate and program the display watermarks and line
9442  * buffer allocation (CIK).
9443  */
9444 void dce8_bandwidth_update(struct radeon_device *rdev)
9445 {
9446 	struct drm_display_mode *mode = NULL;
9447 	u32 num_heads = 0, lb_size;
9448 	int i;
9449 
9450 	radeon_update_display_priority(rdev);
9451 
9452 	for (i = 0; i < rdev->num_crtc; i++) {
9453 		if (rdev->mode_info.crtcs[i]->base.enabled)
9454 			num_heads++;
9455 	}
9456 	for (i = 0; i < rdev->num_crtc; i++) {
9457 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9458 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9459 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9460 	}
9461 }
9462 
9463 /**
9464  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9465  *
9466  * @rdev: radeon_device pointer
9467  *
9468  * Fetches a GPU clock counter snapshot (SI).
9469  * Returns the 64 bit clock counter snapshot.
9470  */
9471 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9472 {
9473 	uint64_t clock;
9474 
9475 	mutex_lock(&rdev->gpu_clock_mutex);
9476 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9477 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9478 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9479 	mutex_unlock(&rdev->gpu_clock_mutex);
9480 	return clock;
9481 }
9482 
9483 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9484                               u32 cntl_reg, u32 status_reg)
9485 {
9486 	int r, i;
9487 	struct atom_clock_dividers dividers;
9488 	uint32_t tmp;
9489 
9490 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9491 					   clock, false, &dividers);
9492 	if (r)
9493 		return r;
9494 
9495 	tmp = RREG32_SMC(cntl_reg);
9496 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9497 	tmp |= dividers.post_divider;
9498 	WREG32_SMC(cntl_reg, tmp);
9499 
9500 	for (i = 0; i < 100; i++) {
9501 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9502 			break;
9503 		mdelay(10);
9504 	}
9505 	if (i == 100)
9506 		return -ETIMEDOUT;
9507 
9508 	return 0;
9509 }
9510 
9511 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9512 {
9513 	int r = 0;
9514 
9515 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9516 	if (r)
9517 		return r;
9518 
9519 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9520 	return r;
9521 }
9522 
9523 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9524 {
9525 	int r, i;
9526 	struct atom_clock_dividers dividers;
9527 	u32 tmp;
9528 
9529 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9530 					   ecclk, false, &dividers);
9531 	if (r)
9532 		return r;
9533 
9534 	for (i = 0; i < 100; i++) {
9535 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9536 			break;
9537 		mdelay(10);
9538 	}
9539 	if (i == 100)
9540 		return -ETIMEDOUT;
9541 
9542 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9543 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9544 	tmp |= dividers.post_divider;
9545 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9546 
9547 	for (i = 0; i < 100; i++) {
9548 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9549 			break;
9550 		mdelay(10);
9551 	}
9552 	if (i == 100)
9553 		return -ETIMEDOUT;
9554 
9555 	return 0;
9556 }
9557 
9558 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9559 {
9560 	struct pci_dev *root = rdev->pdev->bus->self;
9561 	int bridge_pos, gpu_pos;
9562 	u32 speed_cntl, mask, current_data_rate;
9563 	int ret, i;
9564 	u16 tmp16;
9565 
9566 	if (pci_is_root_bus(rdev->pdev->bus))
9567 		return;
9568 
9569 	if (radeon_pcie_gen2 == 0)
9570 		return;
9571 
9572 	if (rdev->flags & RADEON_IS_IGP)
9573 		return;
9574 
9575 	if (!(rdev->flags & RADEON_IS_PCIE))
9576 		return;
9577 
9578 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9579 	if (ret != 0)
9580 		return;
9581 
9582 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9583 		return;
9584 
9585 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9586 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9587 		LC_CURRENT_DATA_RATE_SHIFT;
9588 	if (mask & DRM_PCIE_SPEED_80) {
9589 		if (current_data_rate == 2) {
9590 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9591 			return;
9592 		}
9593 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9594 	} else if (mask & DRM_PCIE_SPEED_50) {
9595 		if (current_data_rate == 1) {
9596 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9597 			return;
9598 		}
9599 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9600 	}
9601 
9602 	bridge_pos = pci_pcie_cap(root);
9603 	if (!bridge_pos)
9604 		return;
9605 
9606 	gpu_pos = pci_pcie_cap(rdev->pdev);
9607 	if (!gpu_pos)
9608 		return;
9609 
9610 	if (mask & DRM_PCIE_SPEED_80) {
9611 		/* re-try equalization if gen3 is not already enabled */
9612 		if (current_data_rate != 2) {
9613 			u16 bridge_cfg, gpu_cfg;
9614 			u16 bridge_cfg2, gpu_cfg2;
9615 			u32 max_lw, current_lw, tmp;
9616 
9617 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9618 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9619 
9620 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9621 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9622 
9623 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9624 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9625 
9626 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9627 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9628 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9629 
9630 			if (current_lw < max_lw) {
9631 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9632 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9633 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9634 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9635 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9636 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9637 				}
9638 			}
9639 
9640 			for (i = 0; i < 10; i++) {
9641 				/* check status */
9642 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9643 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9644 					break;
9645 
9646 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9647 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9648 
9649 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9650 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9651 
9652 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9653 				tmp |= LC_SET_QUIESCE;
9654 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9655 
9656 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9657 				tmp |= LC_REDO_EQ;
9658 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9659 
9660 				mdelay(100);
9661 
9662 				/* linkctl */
9663 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9664 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9665 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9666 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9667 
9668 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9669 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9670 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9671 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9672 
9673 				/* linkctl2 */
9674 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9675 				tmp16 &= ~((1 << 4) | (7 << 9));
9676 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9677 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9678 
9679 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9680 				tmp16 &= ~((1 << 4) | (7 << 9));
9681 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9682 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9683 
9684 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9685 				tmp &= ~LC_SET_QUIESCE;
9686 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9687 			}
9688 		}
9689 	}
9690 
9691 	/* set the link speed */
9692 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9693 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9694 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9695 
9696 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9697 	tmp16 &= ~0xf;
9698 	if (mask & DRM_PCIE_SPEED_80)
9699 		tmp16 |= 3; /* gen3 */
9700 	else if (mask & DRM_PCIE_SPEED_50)
9701 		tmp16 |= 2; /* gen2 */
9702 	else
9703 		tmp16 |= 1; /* gen1 */
9704 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9705 
9706 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9707 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9708 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9709 
9710 	for (i = 0; i < rdev->usec_timeout; i++) {
9711 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9712 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9713 			break;
9714 		udelay(1);
9715 	}
9716 }
9717 
9718 static void cik_program_aspm(struct radeon_device *rdev)
9719 {
9720 	u32 data, orig;
9721 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9722 	bool disable_clkreq = false;
9723 
9724 	if (radeon_aspm == 0)
9725 		return;
9726 
9727 	/* XXX double check IGPs */
9728 	if (rdev->flags & RADEON_IS_IGP)
9729 		return;
9730 
9731 	if (!(rdev->flags & RADEON_IS_PCIE))
9732 		return;
9733 
9734 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9735 	data &= ~LC_XMIT_N_FTS_MASK;
9736 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9737 	if (orig != data)
9738 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9739 
9740 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9741 	data |= LC_GO_TO_RECOVERY;
9742 	if (orig != data)
9743 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9744 
9745 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9746 	data |= P_IGNORE_EDB_ERR;
9747 	if (orig != data)
9748 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9749 
9750 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9751 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9752 	data |= LC_PMI_TO_L1_DIS;
9753 	if (!disable_l0s)
9754 		data |= LC_L0S_INACTIVITY(7);
9755 
9756 	if (!disable_l1) {
9757 		data |= LC_L1_INACTIVITY(7);
9758 		data &= ~LC_PMI_TO_L1_DIS;
9759 		if (orig != data)
9760 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9761 
9762 		if (!disable_plloff_in_l1) {
9763 			bool clk_req_support;
9764 
9765 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9766 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9767 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9768 			if (orig != data)
9769 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9770 
9771 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9772 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9773 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9774 			if (orig != data)
9775 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9776 
9777 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9778 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9779 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9780 			if (orig != data)
9781 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9782 
9783 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9784 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9785 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9786 			if (orig != data)
9787 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9788 
9789 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9790 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9791 			data |= LC_DYN_LANES_PWR_STATE(3);
9792 			if (orig != data)
9793 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9794 
9795 			if (!disable_clkreq &&
9796 			    !pci_is_root_bus(rdev->pdev->bus)) {
9797 				struct pci_dev *root = rdev->pdev->bus->self;
9798 				u32 lnkcap;
9799 
9800 				clk_req_support = false;
9801 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9802 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9803 					clk_req_support = true;
9804 			} else {
9805 				clk_req_support = false;
9806 			}
9807 
9808 			if (clk_req_support) {
9809 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9810 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9811 				if (orig != data)
9812 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9813 
9814 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9815 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9816 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9817 				if (orig != data)
9818 					WREG32_SMC(THM_CLK_CNTL, data);
9819 
9820 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9821 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9822 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9823 				if (orig != data)
9824 					WREG32_SMC(MISC_CLK_CTRL, data);
9825 
9826 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9827 				data &= ~BCLK_AS_XCLK;
9828 				if (orig != data)
9829 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9830 
9831 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9832 				data &= ~FORCE_BIF_REFCLK_EN;
9833 				if (orig != data)
9834 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9835 
9836 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9837 				data &= ~MPLL_CLKOUT_SEL_MASK;
9838 				data |= MPLL_CLKOUT_SEL(4);
9839 				if (orig != data)
9840 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9841 			}
9842 		}
9843 	} else {
9844 		if (orig != data)
9845 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9846 	}
9847 
9848 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9849 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9850 	if (orig != data)
9851 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9852 
9853 	if (!disable_l0s) {
9854 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9855 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9856 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9857 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9858 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9859 				data &= ~LC_L0S_INACTIVITY_MASK;
9860 				if (orig != data)
9861 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9862 			}
9863 		}
9864 	}
9865 }
9866