xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 93d90ad7)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36 
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55 
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65 
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74 
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81 
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89 
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96 
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103 
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110 
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117 
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123 extern void sumo_rlc_fini(struct radeon_device *rdev);
124 extern int sumo_rlc_init(struct radeon_device *rdev);
125 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126 extern void si_rlc_reset(struct radeon_device *rdev);
127 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 extern int cik_sdma_resume(struct radeon_device *rdev);
130 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131 extern void cik_sdma_fini(struct radeon_device *rdev);
132 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141 					  bool enable);
142 
143 /* get temperature in millidegrees */
144 int ci_get_temp(struct radeon_device *rdev)
145 {
146 	u32 temp;
147 	int actual_temp = 0;
148 
149 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150 		CTF_TEMP_SHIFT;
151 
152 	if (temp & 0x200)
153 		actual_temp = 255;
154 	else
155 		actual_temp = temp & 0x1ff;
156 
157 	actual_temp = actual_temp * 1000;
158 
159 	return actual_temp;
160 }
161 
162 /* get temperature in millidegrees */
163 int kv_get_temp(struct radeon_device *rdev)
164 {
165 	u32 temp;
166 	int actual_temp = 0;
167 
168 	temp = RREG32_SMC(0xC0300E0C);
169 
170 	if (temp)
171 		actual_temp = (temp / 8) - 49;
172 	else
173 		actual_temp = 0;
174 
175 	actual_temp = actual_temp * 1000;
176 
177 	return actual_temp;
178 }
179 
180 /*
181  * Indirect registers accessor
182  */
183 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184 {
185 	unsigned long flags;
186 	u32 r;
187 
188 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189 	WREG32(PCIE_INDEX, reg);
190 	(void)RREG32(PCIE_INDEX);
191 	r = RREG32(PCIE_DATA);
192 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193 	return r;
194 }
195 
196 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198 	unsigned long flags;
199 
200 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201 	WREG32(PCIE_INDEX, reg);
202 	(void)RREG32(PCIE_INDEX);
203 	WREG32(PCIE_DATA, v);
204 	(void)RREG32(PCIE_DATA);
205 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206 }
207 
208 static const u32 spectre_rlc_save_restore_register_list[] =
209 {
210 	(0x0e00 << 16) | (0xc12c >> 2),
211 	0x00000000,
212 	(0x0e00 << 16) | (0xc140 >> 2),
213 	0x00000000,
214 	(0x0e00 << 16) | (0xc150 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0xc15c >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0xc168 >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0xc170 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0xc178 >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0xc204 >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0xc2b4 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc2b8 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc2bc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc2c0 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0x8228 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0x829c >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0x869c >> 2),
239 	0x00000000,
240 	(0x0600 << 16) | (0x98f4 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0x98f8 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0x9900 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc260 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0x90e8 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0x3c000 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0x3c00c >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0x8c1c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0x9700 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xcd20 >> 2),
259 	0x00000000,
260 	(0x4e00 << 16) | (0xcd20 >> 2),
261 	0x00000000,
262 	(0x5e00 << 16) | (0xcd20 >> 2),
263 	0x00000000,
264 	(0x6e00 << 16) | (0xcd20 >> 2),
265 	0x00000000,
266 	(0x7e00 << 16) | (0xcd20 >> 2),
267 	0x00000000,
268 	(0x8e00 << 16) | (0xcd20 >> 2),
269 	0x00000000,
270 	(0x9e00 << 16) | (0xcd20 >> 2),
271 	0x00000000,
272 	(0xae00 << 16) | (0xcd20 >> 2),
273 	0x00000000,
274 	(0xbe00 << 16) | (0xcd20 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x89bc >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x8900 >> 2),
279 	0x00000000,
280 	0x3,
281 	(0x0e00 << 16) | (0xc130 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc134 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc1fc >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc208 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc264 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc268 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc26c >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc270 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0xc274 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0xc278 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0xc27c >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0xc280 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc284 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0xc288 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc28c >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0xc290 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0xc294 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0xc298 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xc29c >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0xc2a0 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xc2a4 >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0xc2a8 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc2ac  >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc2b0 >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0x301d0 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0x30238 >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0x30250 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x30254 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x30258 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0x3025c >> 2),
340 	0x00000000,
341 	(0x4e00 << 16) | (0xc900 >> 2),
342 	0x00000000,
343 	(0x5e00 << 16) | (0xc900 >> 2),
344 	0x00000000,
345 	(0x6e00 << 16) | (0xc900 >> 2),
346 	0x00000000,
347 	(0x7e00 << 16) | (0xc900 >> 2),
348 	0x00000000,
349 	(0x8e00 << 16) | (0xc900 >> 2),
350 	0x00000000,
351 	(0x9e00 << 16) | (0xc900 >> 2),
352 	0x00000000,
353 	(0xae00 << 16) | (0xc900 >> 2),
354 	0x00000000,
355 	(0xbe00 << 16) | (0xc900 >> 2),
356 	0x00000000,
357 	(0x4e00 << 16) | (0xc904 >> 2),
358 	0x00000000,
359 	(0x5e00 << 16) | (0xc904 >> 2),
360 	0x00000000,
361 	(0x6e00 << 16) | (0xc904 >> 2),
362 	0x00000000,
363 	(0x7e00 << 16) | (0xc904 >> 2),
364 	0x00000000,
365 	(0x8e00 << 16) | (0xc904 >> 2),
366 	0x00000000,
367 	(0x9e00 << 16) | (0xc904 >> 2),
368 	0x00000000,
369 	(0xae00 << 16) | (0xc904 >> 2),
370 	0x00000000,
371 	(0xbe00 << 16) | (0xc904 >> 2),
372 	0x00000000,
373 	(0x4e00 << 16) | (0xc908 >> 2),
374 	0x00000000,
375 	(0x5e00 << 16) | (0xc908 >> 2),
376 	0x00000000,
377 	(0x6e00 << 16) | (0xc908 >> 2),
378 	0x00000000,
379 	(0x7e00 << 16) | (0xc908 >> 2),
380 	0x00000000,
381 	(0x8e00 << 16) | (0xc908 >> 2),
382 	0x00000000,
383 	(0x9e00 << 16) | (0xc908 >> 2),
384 	0x00000000,
385 	(0xae00 << 16) | (0xc908 >> 2),
386 	0x00000000,
387 	(0xbe00 << 16) | (0xc908 >> 2),
388 	0x00000000,
389 	(0x4e00 << 16) | (0xc90c >> 2),
390 	0x00000000,
391 	(0x5e00 << 16) | (0xc90c >> 2),
392 	0x00000000,
393 	(0x6e00 << 16) | (0xc90c >> 2),
394 	0x00000000,
395 	(0x7e00 << 16) | (0xc90c >> 2),
396 	0x00000000,
397 	(0x8e00 << 16) | (0xc90c >> 2),
398 	0x00000000,
399 	(0x9e00 << 16) | (0xc90c >> 2),
400 	0x00000000,
401 	(0xae00 << 16) | (0xc90c >> 2),
402 	0x00000000,
403 	(0xbe00 << 16) | (0xc90c >> 2),
404 	0x00000000,
405 	(0x4e00 << 16) | (0xc910 >> 2),
406 	0x00000000,
407 	(0x5e00 << 16) | (0xc910 >> 2),
408 	0x00000000,
409 	(0x6e00 << 16) | (0xc910 >> 2),
410 	0x00000000,
411 	(0x7e00 << 16) | (0xc910 >> 2),
412 	0x00000000,
413 	(0x8e00 << 16) | (0xc910 >> 2),
414 	0x00000000,
415 	(0x9e00 << 16) | (0xc910 >> 2),
416 	0x00000000,
417 	(0xae00 << 16) | (0xc910 >> 2),
418 	0x00000000,
419 	(0xbe00 << 16) | (0xc910 >> 2),
420 	0x00000000,
421 	(0x0e00 << 16) | (0xc99c >> 2),
422 	0x00000000,
423 	(0x0e00 << 16) | (0x9834 >> 2),
424 	0x00000000,
425 	(0x0000 << 16) | (0x30f00 >> 2),
426 	0x00000000,
427 	(0x0001 << 16) | (0x30f00 >> 2),
428 	0x00000000,
429 	(0x0000 << 16) | (0x30f04 >> 2),
430 	0x00000000,
431 	(0x0001 << 16) | (0x30f04 >> 2),
432 	0x00000000,
433 	(0x0000 << 16) | (0x30f08 >> 2),
434 	0x00000000,
435 	(0x0001 << 16) | (0x30f08 >> 2),
436 	0x00000000,
437 	(0x0000 << 16) | (0x30f0c >> 2),
438 	0x00000000,
439 	(0x0001 << 16) | (0x30f0c >> 2),
440 	0x00000000,
441 	(0x0600 << 16) | (0x9b7c >> 2),
442 	0x00000000,
443 	(0x0e00 << 16) | (0x8a14 >> 2),
444 	0x00000000,
445 	(0x0e00 << 16) | (0x8a18 >> 2),
446 	0x00000000,
447 	(0x0600 << 16) | (0x30a00 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0x8bf0 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0x8bcc >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0x8b24 >> 2),
454 	0x00000000,
455 	(0x0e00 << 16) | (0x30a04 >> 2),
456 	0x00000000,
457 	(0x0600 << 16) | (0x30a10 >> 2),
458 	0x00000000,
459 	(0x0600 << 16) | (0x30a14 >> 2),
460 	0x00000000,
461 	(0x0600 << 16) | (0x30a18 >> 2),
462 	0x00000000,
463 	(0x0600 << 16) | (0x30a2c >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0xc700 >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0xc704 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0xc708 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0xc768 >> 2),
472 	0x00000000,
473 	(0x0400 << 16) | (0xc770 >> 2),
474 	0x00000000,
475 	(0x0400 << 16) | (0xc774 >> 2),
476 	0x00000000,
477 	(0x0400 << 16) | (0xc778 >> 2),
478 	0x00000000,
479 	(0x0400 << 16) | (0xc77c >> 2),
480 	0x00000000,
481 	(0x0400 << 16) | (0xc780 >> 2),
482 	0x00000000,
483 	(0x0400 << 16) | (0xc784 >> 2),
484 	0x00000000,
485 	(0x0400 << 16) | (0xc788 >> 2),
486 	0x00000000,
487 	(0x0400 << 16) | (0xc78c >> 2),
488 	0x00000000,
489 	(0x0400 << 16) | (0xc798 >> 2),
490 	0x00000000,
491 	(0x0400 << 16) | (0xc79c >> 2),
492 	0x00000000,
493 	(0x0400 << 16) | (0xc7a0 >> 2),
494 	0x00000000,
495 	(0x0400 << 16) | (0xc7a4 >> 2),
496 	0x00000000,
497 	(0x0400 << 16) | (0xc7a8 >> 2),
498 	0x00000000,
499 	(0x0400 << 16) | (0xc7ac >> 2),
500 	0x00000000,
501 	(0x0400 << 16) | (0xc7b0 >> 2),
502 	0x00000000,
503 	(0x0400 << 16) | (0xc7b4 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0x9100 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0x3c010 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x92a8 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0x92ac >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x92b4 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x92b8 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x92bc >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x92c0 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x92c4 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x92c8 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0x92cc >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0x92d0 >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0x8c00 >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0x8c04 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0x8c20 >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0x8c38 >> 2),
536 	0x00000000,
537 	(0x0e00 << 16) | (0x8c3c >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0xae00 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0x9604 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0xac08 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0xac0c >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0xac10 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0xac14 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0xac58 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0xac68 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0xac6c >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0xac70 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0xac74 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0xac78 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0xac7c >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0xac80 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0xac84 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0xac88 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0xac8c >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x970c >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x9714 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x9718 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x971c >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x31068 >> 2),
582 	0x00000000,
583 	(0x4e00 << 16) | (0x31068 >> 2),
584 	0x00000000,
585 	(0x5e00 << 16) | (0x31068 >> 2),
586 	0x00000000,
587 	(0x6e00 << 16) | (0x31068 >> 2),
588 	0x00000000,
589 	(0x7e00 << 16) | (0x31068 >> 2),
590 	0x00000000,
591 	(0x8e00 << 16) | (0x31068 >> 2),
592 	0x00000000,
593 	(0x9e00 << 16) | (0x31068 >> 2),
594 	0x00000000,
595 	(0xae00 << 16) | (0x31068 >> 2),
596 	0x00000000,
597 	(0xbe00 << 16) | (0x31068 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xcd10 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xcd14 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0x88b0 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0x88b4 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x88b8 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0x88bc >> 2),
610 	0x00000000,
611 	(0x0400 << 16) | (0x89c0 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x88c4 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x88c8 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0x88d0 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x88d4 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x88d8 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x8980 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x30938 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x3093c >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0x30940 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0x89a0 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x30900 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x30904 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x89b4 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x3c210 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x3c214 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x3c218 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x8904 >> 2),
646 	0x00000000,
647 	0x5,
648 	(0x0e00 << 16) | (0x8c28 >> 2),
649 	(0x0e00 << 16) | (0x8c2c >> 2),
650 	(0x0e00 << 16) | (0x8c30 >> 2),
651 	(0x0e00 << 16) | (0x8c34 >> 2),
652 	(0x0e00 << 16) | (0x9600 >> 2),
653 };
654 
655 static const u32 kalindi_rlc_save_restore_register_list[] =
656 {
657 	(0x0e00 << 16) | (0xc12c >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0xc140 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0xc150 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xc15c >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc168 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc170 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc204 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc2b4 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc2b8 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc2bc >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc2c0 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x8228 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x829c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x869c >> 2),
684 	0x00000000,
685 	(0x0600 << 16) | (0x98f4 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x98f8 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x9900 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc260 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x90e8 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x3c000 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x3c00c >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x8c1c >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x9700 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xcd20 >> 2),
704 	0x00000000,
705 	(0x4e00 << 16) | (0xcd20 >> 2),
706 	0x00000000,
707 	(0x5e00 << 16) | (0xcd20 >> 2),
708 	0x00000000,
709 	(0x6e00 << 16) | (0xcd20 >> 2),
710 	0x00000000,
711 	(0x7e00 << 16) | (0xcd20 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x89bc >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x8900 >> 2),
716 	0x00000000,
717 	0x3,
718 	(0x0e00 << 16) | (0xc130 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc134 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc1fc >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc208 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc264 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc268 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc26c >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc270 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc274 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc28c >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0xc290 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0xc294 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0xc298 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0xc2a0 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0xc2a4 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0xc2a8 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc2ac >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x301d0 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x30238 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x30250 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x30254 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x30258 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0x3025c >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xc900 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xc900 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xc900 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xc900 >> 2),
771 	0x00000000,
772 	(0x4e00 << 16) | (0xc904 >> 2),
773 	0x00000000,
774 	(0x5e00 << 16) | (0xc904 >> 2),
775 	0x00000000,
776 	(0x6e00 << 16) | (0xc904 >> 2),
777 	0x00000000,
778 	(0x7e00 << 16) | (0xc904 >> 2),
779 	0x00000000,
780 	(0x4e00 << 16) | (0xc908 >> 2),
781 	0x00000000,
782 	(0x5e00 << 16) | (0xc908 >> 2),
783 	0x00000000,
784 	(0x6e00 << 16) | (0xc908 >> 2),
785 	0x00000000,
786 	(0x7e00 << 16) | (0xc908 >> 2),
787 	0x00000000,
788 	(0x4e00 << 16) | (0xc90c >> 2),
789 	0x00000000,
790 	(0x5e00 << 16) | (0xc90c >> 2),
791 	0x00000000,
792 	(0x6e00 << 16) | (0xc90c >> 2),
793 	0x00000000,
794 	(0x7e00 << 16) | (0xc90c >> 2),
795 	0x00000000,
796 	(0x4e00 << 16) | (0xc910 >> 2),
797 	0x00000000,
798 	(0x5e00 << 16) | (0xc910 >> 2),
799 	0x00000000,
800 	(0x6e00 << 16) | (0xc910 >> 2),
801 	0x00000000,
802 	(0x7e00 << 16) | (0xc910 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc99c >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x9834 >> 2),
807 	0x00000000,
808 	(0x0000 << 16) | (0x30f00 >> 2),
809 	0x00000000,
810 	(0x0000 << 16) | (0x30f04 >> 2),
811 	0x00000000,
812 	(0x0000 << 16) | (0x30f08 >> 2),
813 	0x00000000,
814 	(0x0000 << 16) | (0x30f0c >> 2),
815 	0x00000000,
816 	(0x0600 << 16) | (0x9b7c >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x8a14 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x8a18 >> 2),
821 	0x00000000,
822 	(0x0600 << 16) | (0x30a00 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0x8bf0 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x8bcc >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0x8b24 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0x30a04 >> 2),
831 	0x00000000,
832 	(0x0600 << 16) | (0x30a10 >> 2),
833 	0x00000000,
834 	(0x0600 << 16) | (0x30a14 >> 2),
835 	0x00000000,
836 	(0x0600 << 16) | (0x30a18 >> 2),
837 	0x00000000,
838 	(0x0600 << 16) | (0x30a2c >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xc700 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xc704 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xc708 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xc768 >> 2),
847 	0x00000000,
848 	(0x0400 << 16) | (0xc770 >> 2),
849 	0x00000000,
850 	(0x0400 << 16) | (0xc774 >> 2),
851 	0x00000000,
852 	(0x0400 << 16) | (0xc798 >> 2),
853 	0x00000000,
854 	(0x0400 << 16) | (0xc79c >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0x9100 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x3c010 >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x8c00 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x8c04 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x8c20 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x8c38 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x8c3c >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xae00 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x9604 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0xac08 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0xac0c >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xac10 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xac14 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0xac58 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0xac68 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0xac6c >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0xac70 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0xac74 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0xac78 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0xac7c >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0xac80 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0xac84 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0xac88 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0xac8c >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x970c >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x9714 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x9718 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x971c >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x31068 >> 2),
913 	0x00000000,
914 	(0x4e00 << 16) | (0x31068 >> 2),
915 	0x00000000,
916 	(0x5e00 << 16) | (0x31068 >> 2),
917 	0x00000000,
918 	(0x6e00 << 16) | (0x31068 >> 2),
919 	0x00000000,
920 	(0x7e00 << 16) | (0x31068 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xcd10 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xcd14 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x88b0 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x88b4 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x88b8 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x88bc >> 2),
933 	0x00000000,
934 	(0x0400 << 16) | (0x89c0 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0x88c4 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x88c8 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0x88d0 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x88d4 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x88d8 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x8980 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0x30938 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0x3093c >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0x30940 >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0x89a0 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0x30900 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0x30904 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0x89b4 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0x3e1fc >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0x3c210 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x3c214 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x3c218 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x8904 >> 2),
971 	0x00000000,
972 	0x5,
973 	(0x0e00 << 16) | (0x8c28 >> 2),
974 	(0x0e00 << 16) | (0x8c2c >> 2),
975 	(0x0e00 << 16) | (0x8c30 >> 2),
976 	(0x0e00 << 16) | (0x8c34 >> 2),
977 	(0x0e00 << 16) | (0x9600 >> 2),
978 };
979 
980 static const u32 bonaire_golden_spm_registers[] =
981 {
982 	0x30800, 0xe0ffffff, 0xe0000000
983 };
984 
985 static const u32 bonaire_golden_common_registers[] =
986 {
987 	0xc770, 0xffffffff, 0x00000800,
988 	0xc774, 0xffffffff, 0x00000800,
989 	0xc798, 0xffffffff, 0x00007fbf,
990 	0xc79c, 0xffffffff, 0x00007faf
991 };
992 
993 static const u32 bonaire_golden_registers[] =
994 {
995 	0x3354, 0x00000333, 0x00000333,
996 	0x3350, 0x000c0fc0, 0x00040200,
997 	0x9a10, 0x00010000, 0x00058208,
998 	0x3c000, 0xffff1fff, 0x00140000,
999 	0x3c200, 0xfdfc0fff, 0x00000100,
1000 	0x3c234, 0x40000000, 0x40000200,
1001 	0x9830, 0xffffffff, 0x00000000,
1002 	0x9834, 0xf00fffff, 0x00000400,
1003 	0x9838, 0x0002021c, 0x00020200,
1004 	0xc78, 0x00000080, 0x00000000,
1005 	0x5bb0, 0x000000f0, 0x00000070,
1006 	0x5bc0, 0xf0311fff, 0x80300000,
1007 	0x98f8, 0x73773777, 0x12010001,
1008 	0x350c, 0x00810000, 0x408af000,
1009 	0x7030, 0x31000111, 0x00000011,
1010 	0x2f48, 0x73773777, 0x12010001,
1011 	0x220c, 0x00007fb6, 0x0021a1b1,
1012 	0x2210, 0x00007fb6, 0x002021b1,
1013 	0x2180, 0x00007fb6, 0x00002191,
1014 	0x2218, 0x00007fb6, 0x002121b1,
1015 	0x221c, 0x00007fb6, 0x002021b1,
1016 	0x21dc, 0x00007fb6, 0x00002191,
1017 	0x21e0, 0x00007fb6, 0x00002191,
1018 	0x3628, 0x0000003f, 0x0000000a,
1019 	0x362c, 0x0000003f, 0x0000000a,
1020 	0x2ae4, 0x00073ffe, 0x000022a2,
1021 	0x240c, 0x000007ff, 0x00000000,
1022 	0x8a14, 0xf000003f, 0x00000007,
1023 	0x8bf0, 0x00002001, 0x00000001,
1024 	0x8b24, 0xffffffff, 0x00ffffff,
1025 	0x30a04, 0x0000ff0f, 0x00000000,
1026 	0x28a4c, 0x07ffffff, 0x06000000,
1027 	0x4d8, 0x00000fff, 0x00000100,
1028 	0x3e78, 0x00000001, 0x00000002,
1029 	0x9100, 0x03000000, 0x0362c688,
1030 	0x8c00, 0x000000ff, 0x00000001,
1031 	0xe40, 0x00001fff, 0x00001fff,
1032 	0x9060, 0x0000007f, 0x00000020,
1033 	0x9508, 0x00010000, 0x00010000,
1034 	0xac14, 0x000003ff, 0x000000f3,
1035 	0xac0c, 0xffffffff, 0x00001032
1036 };
1037 
1038 static const u32 bonaire_mgcg_cgcg_init[] =
1039 {
1040 	0xc420, 0xffffffff, 0xfffffffc,
1041 	0x30800, 0xffffffff, 0xe0000000,
1042 	0x3c2a0, 0xffffffff, 0x00000100,
1043 	0x3c208, 0xffffffff, 0x00000100,
1044 	0x3c2c0, 0xffffffff, 0xc0000100,
1045 	0x3c2c8, 0xffffffff, 0xc0000100,
1046 	0x3c2c4, 0xffffffff, 0xc0000100,
1047 	0x55e4, 0xffffffff, 0x00600100,
1048 	0x3c280, 0xffffffff, 0x00000100,
1049 	0x3c214, 0xffffffff, 0x06000100,
1050 	0x3c220, 0xffffffff, 0x00000100,
1051 	0x3c218, 0xffffffff, 0x06000100,
1052 	0x3c204, 0xffffffff, 0x00000100,
1053 	0x3c2e0, 0xffffffff, 0x00000100,
1054 	0x3c224, 0xffffffff, 0x00000100,
1055 	0x3c200, 0xffffffff, 0x00000100,
1056 	0x3c230, 0xffffffff, 0x00000100,
1057 	0x3c234, 0xffffffff, 0x00000100,
1058 	0x3c250, 0xffffffff, 0x00000100,
1059 	0x3c254, 0xffffffff, 0x00000100,
1060 	0x3c258, 0xffffffff, 0x00000100,
1061 	0x3c25c, 0xffffffff, 0x00000100,
1062 	0x3c260, 0xffffffff, 0x00000100,
1063 	0x3c27c, 0xffffffff, 0x00000100,
1064 	0x3c278, 0xffffffff, 0x00000100,
1065 	0x3c210, 0xffffffff, 0x06000100,
1066 	0x3c290, 0xffffffff, 0x00000100,
1067 	0x3c274, 0xffffffff, 0x00000100,
1068 	0x3c2b4, 0xffffffff, 0x00000100,
1069 	0x3c2b0, 0xffffffff, 0x00000100,
1070 	0x3c270, 0xffffffff, 0x00000100,
1071 	0x30800, 0xffffffff, 0xe0000000,
1072 	0x3c020, 0xffffffff, 0x00010000,
1073 	0x3c024, 0xffffffff, 0x00030002,
1074 	0x3c028, 0xffffffff, 0x00040007,
1075 	0x3c02c, 0xffffffff, 0x00060005,
1076 	0x3c030, 0xffffffff, 0x00090008,
1077 	0x3c034, 0xffffffff, 0x00010000,
1078 	0x3c038, 0xffffffff, 0x00030002,
1079 	0x3c03c, 0xffffffff, 0x00040007,
1080 	0x3c040, 0xffffffff, 0x00060005,
1081 	0x3c044, 0xffffffff, 0x00090008,
1082 	0x3c048, 0xffffffff, 0x00010000,
1083 	0x3c04c, 0xffffffff, 0x00030002,
1084 	0x3c050, 0xffffffff, 0x00040007,
1085 	0x3c054, 0xffffffff, 0x00060005,
1086 	0x3c058, 0xffffffff, 0x00090008,
1087 	0x3c05c, 0xffffffff, 0x00010000,
1088 	0x3c060, 0xffffffff, 0x00030002,
1089 	0x3c064, 0xffffffff, 0x00040007,
1090 	0x3c068, 0xffffffff, 0x00060005,
1091 	0x3c06c, 0xffffffff, 0x00090008,
1092 	0x3c070, 0xffffffff, 0x00010000,
1093 	0x3c074, 0xffffffff, 0x00030002,
1094 	0x3c078, 0xffffffff, 0x00040007,
1095 	0x3c07c, 0xffffffff, 0x00060005,
1096 	0x3c080, 0xffffffff, 0x00090008,
1097 	0x3c084, 0xffffffff, 0x00010000,
1098 	0x3c088, 0xffffffff, 0x00030002,
1099 	0x3c08c, 0xffffffff, 0x00040007,
1100 	0x3c090, 0xffffffff, 0x00060005,
1101 	0x3c094, 0xffffffff, 0x00090008,
1102 	0x3c098, 0xffffffff, 0x00010000,
1103 	0x3c09c, 0xffffffff, 0x00030002,
1104 	0x3c0a0, 0xffffffff, 0x00040007,
1105 	0x3c0a4, 0xffffffff, 0x00060005,
1106 	0x3c0a8, 0xffffffff, 0x00090008,
1107 	0x3c000, 0xffffffff, 0x96e00200,
1108 	0x8708, 0xffffffff, 0x00900100,
1109 	0xc424, 0xffffffff, 0x0020003f,
1110 	0x38, 0xffffffff, 0x0140001c,
1111 	0x3c, 0x000f0000, 0x000f0000,
1112 	0x220, 0xffffffff, 0xC060000C,
1113 	0x224, 0xc0000fff, 0x00000100,
1114 	0xf90, 0xffffffff, 0x00000100,
1115 	0xf98, 0x00000101, 0x00000000,
1116 	0x20a8, 0xffffffff, 0x00000104,
1117 	0x55e4, 0xff000fff, 0x00000100,
1118 	0x30cc, 0xc0000fff, 0x00000104,
1119 	0xc1e4, 0x00000001, 0x00000001,
1120 	0xd00c, 0xff000ff0, 0x00000100,
1121 	0xd80c, 0xff000ff0, 0x00000100
1122 };
1123 
1124 static const u32 spectre_golden_spm_registers[] =
1125 {
1126 	0x30800, 0xe0ffffff, 0xe0000000
1127 };
1128 
1129 static const u32 spectre_golden_common_registers[] =
1130 {
1131 	0xc770, 0xffffffff, 0x00000800,
1132 	0xc774, 0xffffffff, 0x00000800,
1133 	0xc798, 0xffffffff, 0x00007fbf,
1134 	0xc79c, 0xffffffff, 0x00007faf
1135 };
1136 
1137 static const u32 spectre_golden_registers[] =
1138 {
1139 	0x3c000, 0xffff1fff, 0x96940200,
1140 	0x3c00c, 0xffff0001, 0xff000000,
1141 	0x3c200, 0xfffc0fff, 0x00000100,
1142 	0x6ed8, 0x00010101, 0x00010000,
1143 	0x9834, 0xf00fffff, 0x00000400,
1144 	0x9838, 0xfffffffc, 0x00020200,
1145 	0x5bb0, 0x000000f0, 0x00000070,
1146 	0x5bc0, 0xf0311fff, 0x80300000,
1147 	0x98f8, 0x73773777, 0x12010001,
1148 	0x9b7c, 0x00ff0000, 0x00fc0000,
1149 	0x2f48, 0x73773777, 0x12010001,
1150 	0x8a14, 0xf000003f, 0x00000007,
1151 	0x8b24, 0xffffffff, 0x00ffffff,
1152 	0x28350, 0x3f3f3fff, 0x00000082,
1153 	0x28354, 0x0000003f, 0x00000000,
1154 	0x3e78, 0x00000001, 0x00000002,
1155 	0x913c, 0xffff03df, 0x00000004,
1156 	0xc768, 0x00000008, 0x00000008,
1157 	0x8c00, 0x000008ff, 0x00000800,
1158 	0x9508, 0x00010000, 0x00010000,
1159 	0xac0c, 0xffffffff, 0x54763210,
1160 	0x214f8, 0x01ff01ff, 0x00000002,
1161 	0x21498, 0x007ff800, 0x00200000,
1162 	0x2015c, 0xffffffff, 0x00000f40,
1163 	0x30934, 0xffffffff, 0x00000001
1164 };
1165 
1166 static const u32 spectre_mgcg_cgcg_init[] =
1167 {
1168 	0xc420, 0xffffffff, 0xfffffffc,
1169 	0x30800, 0xffffffff, 0xe0000000,
1170 	0x3c2a0, 0xffffffff, 0x00000100,
1171 	0x3c208, 0xffffffff, 0x00000100,
1172 	0x3c2c0, 0xffffffff, 0x00000100,
1173 	0x3c2c8, 0xffffffff, 0x00000100,
1174 	0x3c2c4, 0xffffffff, 0x00000100,
1175 	0x55e4, 0xffffffff, 0x00600100,
1176 	0x3c280, 0xffffffff, 0x00000100,
1177 	0x3c214, 0xffffffff, 0x06000100,
1178 	0x3c220, 0xffffffff, 0x00000100,
1179 	0x3c218, 0xffffffff, 0x06000100,
1180 	0x3c204, 0xffffffff, 0x00000100,
1181 	0x3c2e0, 0xffffffff, 0x00000100,
1182 	0x3c224, 0xffffffff, 0x00000100,
1183 	0x3c200, 0xffffffff, 0x00000100,
1184 	0x3c230, 0xffffffff, 0x00000100,
1185 	0x3c234, 0xffffffff, 0x00000100,
1186 	0x3c250, 0xffffffff, 0x00000100,
1187 	0x3c254, 0xffffffff, 0x00000100,
1188 	0x3c258, 0xffffffff, 0x00000100,
1189 	0x3c25c, 0xffffffff, 0x00000100,
1190 	0x3c260, 0xffffffff, 0x00000100,
1191 	0x3c27c, 0xffffffff, 0x00000100,
1192 	0x3c278, 0xffffffff, 0x00000100,
1193 	0x3c210, 0xffffffff, 0x06000100,
1194 	0x3c290, 0xffffffff, 0x00000100,
1195 	0x3c274, 0xffffffff, 0x00000100,
1196 	0x3c2b4, 0xffffffff, 0x00000100,
1197 	0x3c2b0, 0xffffffff, 0x00000100,
1198 	0x3c270, 0xffffffff, 0x00000100,
1199 	0x30800, 0xffffffff, 0xe0000000,
1200 	0x3c020, 0xffffffff, 0x00010000,
1201 	0x3c024, 0xffffffff, 0x00030002,
1202 	0x3c028, 0xffffffff, 0x00040007,
1203 	0x3c02c, 0xffffffff, 0x00060005,
1204 	0x3c030, 0xffffffff, 0x00090008,
1205 	0x3c034, 0xffffffff, 0x00010000,
1206 	0x3c038, 0xffffffff, 0x00030002,
1207 	0x3c03c, 0xffffffff, 0x00040007,
1208 	0x3c040, 0xffffffff, 0x00060005,
1209 	0x3c044, 0xffffffff, 0x00090008,
1210 	0x3c048, 0xffffffff, 0x00010000,
1211 	0x3c04c, 0xffffffff, 0x00030002,
1212 	0x3c050, 0xffffffff, 0x00040007,
1213 	0x3c054, 0xffffffff, 0x00060005,
1214 	0x3c058, 0xffffffff, 0x00090008,
1215 	0x3c05c, 0xffffffff, 0x00010000,
1216 	0x3c060, 0xffffffff, 0x00030002,
1217 	0x3c064, 0xffffffff, 0x00040007,
1218 	0x3c068, 0xffffffff, 0x00060005,
1219 	0x3c06c, 0xffffffff, 0x00090008,
1220 	0x3c070, 0xffffffff, 0x00010000,
1221 	0x3c074, 0xffffffff, 0x00030002,
1222 	0x3c078, 0xffffffff, 0x00040007,
1223 	0x3c07c, 0xffffffff, 0x00060005,
1224 	0x3c080, 0xffffffff, 0x00090008,
1225 	0x3c084, 0xffffffff, 0x00010000,
1226 	0x3c088, 0xffffffff, 0x00030002,
1227 	0x3c08c, 0xffffffff, 0x00040007,
1228 	0x3c090, 0xffffffff, 0x00060005,
1229 	0x3c094, 0xffffffff, 0x00090008,
1230 	0x3c098, 0xffffffff, 0x00010000,
1231 	0x3c09c, 0xffffffff, 0x00030002,
1232 	0x3c0a0, 0xffffffff, 0x00040007,
1233 	0x3c0a4, 0xffffffff, 0x00060005,
1234 	0x3c0a8, 0xffffffff, 0x00090008,
1235 	0x3c0ac, 0xffffffff, 0x00010000,
1236 	0x3c0b0, 0xffffffff, 0x00030002,
1237 	0x3c0b4, 0xffffffff, 0x00040007,
1238 	0x3c0b8, 0xffffffff, 0x00060005,
1239 	0x3c0bc, 0xffffffff, 0x00090008,
1240 	0x3c000, 0xffffffff, 0x96e00200,
1241 	0x8708, 0xffffffff, 0x00900100,
1242 	0xc424, 0xffffffff, 0x0020003f,
1243 	0x38, 0xffffffff, 0x0140001c,
1244 	0x3c, 0x000f0000, 0x000f0000,
1245 	0x220, 0xffffffff, 0xC060000C,
1246 	0x224, 0xc0000fff, 0x00000100,
1247 	0xf90, 0xffffffff, 0x00000100,
1248 	0xf98, 0x00000101, 0x00000000,
1249 	0x20a8, 0xffffffff, 0x00000104,
1250 	0x55e4, 0xff000fff, 0x00000100,
1251 	0x30cc, 0xc0000fff, 0x00000104,
1252 	0xc1e4, 0x00000001, 0x00000001,
1253 	0xd00c, 0xff000ff0, 0x00000100,
1254 	0xd80c, 0xff000ff0, 0x00000100
1255 };
1256 
1257 static const u32 kalindi_golden_spm_registers[] =
1258 {
1259 	0x30800, 0xe0ffffff, 0xe0000000
1260 };
1261 
1262 static const u32 kalindi_golden_common_registers[] =
1263 {
1264 	0xc770, 0xffffffff, 0x00000800,
1265 	0xc774, 0xffffffff, 0x00000800,
1266 	0xc798, 0xffffffff, 0x00007fbf,
1267 	0xc79c, 0xffffffff, 0x00007faf
1268 };
1269 
1270 static const u32 kalindi_golden_registers[] =
1271 {
1272 	0x3c000, 0xffffdfff, 0x6e944040,
1273 	0x55e4, 0xff607fff, 0xfc000100,
1274 	0x3c220, 0xff000fff, 0x00000100,
1275 	0x3c224, 0xff000fff, 0x00000100,
1276 	0x3c200, 0xfffc0fff, 0x00000100,
1277 	0x6ed8, 0x00010101, 0x00010000,
1278 	0x9830, 0xffffffff, 0x00000000,
1279 	0x9834, 0xf00fffff, 0x00000400,
1280 	0x5bb0, 0x000000f0, 0x00000070,
1281 	0x5bc0, 0xf0311fff, 0x80300000,
1282 	0x98f8, 0x73773777, 0x12010001,
1283 	0x98fc, 0xffffffff, 0x00000010,
1284 	0x9b7c, 0x00ff0000, 0x00fc0000,
1285 	0x8030, 0x00001f0f, 0x0000100a,
1286 	0x2f48, 0x73773777, 0x12010001,
1287 	0x2408, 0x000fffff, 0x000c007f,
1288 	0x8a14, 0xf000003f, 0x00000007,
1289 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1290 	0x30a04, 0x0000ff0f, 0x00000000,
1291 	0x28a4c, 0x07ffffff, 0x06000000,
1292 	0x4d8, 0x00000fff, 0x00000100,
1293 	0x3e78, 0x00000001, 0x00000002,
1294 	0xc768, 0x00000008, 0x00000008,
1295 	0x8c00, 0x000000ff, 0x00000003,
1296 	0x214f8, 0x01ff01ff, 0x00000002,
1297 	0x21498, 0x007ff800, 0x00200000,
1298 	0x2015c, 0xffffffff, 0x00000f40,
1299 	0x88c4, 0x001f3ae3, 0x00000082,
1300 	0x88d4, 0x0000001f, 0x00000010,
1301 	0x30934, 0xffffffff, 0x00000000
1302 };
1303 
1304 static const u32 kalindi_mgcg_cgcg_init[] =
1305 {
1306 	0xc420, 0xffffffff, 0xfffffffc,
1307 	0x30800, 0xffffffff, 0xe0000000,
1308 	0x3c2a0, 0xffffffff, 0x00000100,
1309 	0x3c208, 0xffffffff, 0x00000100,
1310 	0x3c2c0, 0xffffffff, 0x00000100,
1311 	0x3c2c8, 0xffffffff, 0x00000100,
1312 	0x3c2c4, 0xffffffff, 0x00000100,
1313 	0x55e4, 0xffffffff, 0x00600100,
1314 	0x3c280, 0xffffffff, 0x00000100,
1315 	0x3c214, 0xffffffff, 0x06000100,
1316 	0x3c220, 0xffffffff, 0x00000100,
1317 	0x3c218, 0xffffffff, 0x06000100,
1318 	0x3c204, 0xffffffff, 0x00000100,
1319 	0x3c2e0, 0xffffffff, 0x00000100,
1320 	0x3c224, 0xffffffff, 0x00000100,
1321 	0x3c200, 0xffffffff, 0x00000100,
1322 	0x3c230, 0xffffffff, 0x00000100,
1323 	0x3c234, 0xffffffff, 0x00000100,
1324 	0x3c250, 0xffffffff, 0x00000100,
1325 	0x3c254, 0xffffffff, 0x00000100,
1326 	0x3c258, 0xffffffff, 0x00000100,
1327 	0x3c25c, 0xffffffff, 0x00000100,
1328 	0x3c260, 0xffffffff, 0x00000100,
1329 	0x3c27c, 0xffffffff, 0x00000100,
1330 	0x3c278, 0xffffffff, 0x00000100,
1331 	0x3c210, 0xffffffff, 0x06000100,
1332 	0x3c290, 0xffffffff, 0x00000100,
1333 	0x3c274, 0xffffffff, 0x00000100,
1334 	0x3c2b4, 0xffffffff, 0x00000100,
1335 	0x3c2b0, 0xffffffff, 0x00000100,
1336 	0x3c270, 0xffffffff, 0x00000100,
1337 	0x30800, 0xffffffff, 0xe0000000,
1338 	0x3c020, 0xffffffff, 0x00010000,
1339 	0x3c024, 0xffffffff, 0x00030002,
1340 	0x3c028, 0xffffffff, 0x00040007,
1341 	0x3c02c, 0xffffffff, 0x00060005,
1342 	0x3c030, 0xffffffff, 0x00090008,
1343 	0x3c034, 0xffffffff, 0x00010000,
1344 	0x3c038, 0xffffffff, 0x00030002,
1345 	0x3c03c, 0xffffffff, 0x00040007,
1346 	0x3c040, 0xffffffff, 0x00060005,
1347 	0x3c044, 0xffffffff, 0x00090008,
1348 	0x3c000, 0xffffffff, 0x96e00200,
1349 	0x8708, 0xffffffff, 0x00900100,
1350 	0xc424, 0xffffffff, 0x0020003f,
1351 	0x38, 0xffffffff, 0x0140001c,
1352 	0x3c, 0x000f0000, 0x000f0000,
1353 	0x220, 0xffffffff, 0xC060000C,
1354 	0x224, 0xc0000fff, 0x00000100,
1355 	0x20a8, 0xffffffff, 0x00000104,
1356 	0x55e4, 0xff000fff, 0x00000100,
1357 	0x30cc, 0xc0000fff, 0x00000104,
1358 	0xc1e4, 0x00000001, 0x00000001,
1359 	0xd00c, 0xff000ff0, 0x00000100,
1360 	0xd80c, 0xff000ff0, 0x00000100
1361 };
1362 
1363 static const u32 hawaii_golden_spm_registers[] =
1364 {
1365 	0x30800, 0xe0ffffff, 0xe0000000
1366 };
1367 
1368 static const u32 hawaii_golden_common_registers[] =
1369 {
1370 	0x30800, 0xffffffff, 0xe0000000,
1371 	0x28350, 0xffffffff, 0x3a00161a,
1372 	0x28354, 0xffffffff, 0x0000002e,
1373 	0x9a10, 0xffffffff, 0x00018208,
1374 	0x98f8, 0xffffffff, 0x12011003
1375 };
1376 
1377 static const u32 hawaii_golden_registers[] =
1378 {
1379 	0x3354, 0x00000333, 0x00000333,
1380 	0x9a10, 0x00010000, 0x00058208,
1381 	0x9830, 0xffffffff, 0x00000000,
1382 	0x9834, 0xf00fffff, 0x00000400,
1383 	0x9838, 0x0002021c, 0x00020200,
1384 	0xc78, 0x00000080, 0x00000000,
1385 	0x5bb0, 0x000000f0, 0x00000070,
1386 	0x5bc0, 0xf0311fff, 0x80300000,
1387 	0x350c, 0x00810000, 0x408af000,
1388 	0x7030, 0x31000111, 0x00000011,
1389 	0x2f48, 0x73773777, 0x12010001,
1390 	0x2120, 0x0000007f, 0x0000001b,
1391 	0x21dc, 0x00007fb6, 0x00002191,
1392 	0x3628, 0x0000003f, 0x0000000a,
1393 	0x362c, 0x0000003f, 0x0000000a,
1394 	0x2ae4, 0x00073ffe, 0x000022a2,
1395 	0x240c, 0x000007ff, 0x00000000,
1396 	0x8bf0, 0x00002001, 0x00000001,
1397 	0x8b24, 0xffffffff, 0x00ffffff,
1398 	0x30a04, 0x0000ff0f, 0x00000000,
1399 	0x28a4c, 0x07ffffff, 0x06000000,
1400 	0x3e78, 0x00000001, 0x00000002,
1401 	0xc768, 0x00000008, 0x00000008,
1402 	0xc770, 0x00000f00, 0x00000800,
1403 	0xc774, 0x00000f00, 0x00000800,
1404 	0xc798, 0x00ffffff, 0x00ff7fbf,
1405 	0xc79c, 0x00ffffff, 0x00ff7faf,
1406 	0x8c00, 0x000000ff, 0x00000800,
1407 	0xe40, 0x00001fff, 0x00001fff,
1408 	0x9060, 0x0000007f, 0x00000020,
1409 	0x9508, 0x00010000, 0x00010000,
1410 	0xae00, 0x00100000, 0x000ff07c,
1411 	0xac14, 0x000003ff, 0x0000000f,
1412 	0xac10, 0xffffffff, 0x7564fdec,
1413 	0xac0c, 0xffffffff, 0x3120b9a8,
1414 	0xac08, 0x20000000, 0x0f9c0000
1415 };
1416 
1417 static const u32 hawaii_mgcg_cgcg_init[] =
1418 {
1419 	0xc420, 0xffffffff, 0xfffffffd,
1420 	0x30800, 0xffffffff, 0xe0000000,
1421 	0x3c2a0, 0xffffffff, 0x00000100,
1422 	0x3c208, 0xffffffff, 0x00000100,
1423 	0x3c2c0, 0xffffffff, 0x00000100,
1424 	0x3c2c8, 0xffffffff, 0x00000100,
1425 	0x3c2c4, 0xffffffff, 0x00000100,
1426 	0x55e4, 0xffffffff, 0x00200100,
1427 	0x3c280, 0xffffffff, 0x00000100,
1428 	0x3c214, 0xffffffff, 0x06000100,
1429 	0x3c220, 0xffffffff, 0x00000100,
1430 	0x3c218, 0xffffffff, 0x06000100,
1431 	0x3c204, 0xffffffff, 0x00000100,
1432 	0x3c2e0, 0xffffffff, 0x00000100,
1433 	0x3c224, 0xffffffff, 0x00000100,
1434 	0x3c200, 0xffffffff, 0x00000100,
1435 	0x3c230, 0xffffffff, 0x00000100,
1436 	0x3c234, 0xffffffff, 0x00000100,
1437 	0x3c250, 0xffffffff, 0x00000100,
1438 	0x3c254, 0xffffffff, 0x00000100,
1439 	0x3c258, 0xffffffff, 0x00000100,
1440 	0x3c25c, 0xffffffff, 0x00000100,
1441 	0x3c260, 0xffffffff, 0x00000100,
1442 	0x3c27c, 0xffffffff, 0x00000100,
1443 	0x3c278, 0xffffffff, 0x00000100,
1444 	0x3c210, 0xffffffff, 0x06000100,
1445 	0x3c290, 0xffffffff, 0x00000100,
1446 	0x3c274, 0xffffffff, 0x00000100,
1447 	0x3c2b4, 0xffffffff, 0x00000100,
1448 	0x3c2b0, 0xffffffff, 0x00000100,
1449 	0x3c270, 0xffffffff, 0x00000100,
1450 	0x30800, 0xffffffff, 0xe0000000,
1451 	0x3c020, 0xffffffff, 0x00010000,
1452 	0x3c024, 0xffffffff, 0x00030002,
1453 	0x3c028, 0xffffffff, 0x00040007,
1454 	0x3c02c, 0xffffffff, 0x00060005,
1455 	0x3c030, 0xffffffff, 0x00090008,
1456 	0x3c034, 0xffffffff, 0x00010000,
1457 	0x3c038, 0xffffffff, 0x00030002,
1458 	0x3c03c, 0xffffffff, 0x00040007,
1459 	0x3c040, 0xffffffff, 0x00060005,
1460 	0x3c044, 0xffffffff, 0x00090008,
1461 	0x3c048, 0xffffffff, 0x00010000,
1462 	0x3c04c, 0xffffffff, 0x00030002,
1463 	0x3c050, 0xffffffff, 0x00040007,
1464 	0x3c054, 0xffffffff, 0x00060005,
1465 	0x3c058, 0xffffffff, 0x00090008,
1466 	0x3c05c, 0xffffffff, 0x00010000,
1467 	0x3c060, 0xffffffff, 0x00030002,
1468 	0x3c064, 0xffffffff, 0x00040007,
1469 	0x3c068, 0xffffffff, 0x00060005,
1470 	0x3c06c, 0xffffffff, 0x00090008,
1471 	0x3c070, 0xffffffff, 0x00010000,
1472 	0x3c074, 0xffffffff, 0x00030002,
1473 	0x3c078, 0xffffffff, 0x00040007,
1474 	0x3c07c, 0xffffffff, 0x00060005,
1475 	0x3c080, 0xffffffff, 0x00090008,
1476 	0x3c084, 0xffffffff, 0x00010000,
1477 	0x3c088, 0xffffffff, 0x00030002,
1478 	0x3c08c, 0xffffffff, 0x00040007,
1479 	0x3c090, 0xffffffff, 0x00060005,
1480 	0x3c094, 0xffffffff, 0x00090008,
1481 	0x3c098, 0xffffffff, 0x00010000,
1482 	0x3c09c, 0xffffffff, 0x00030002,
1483 	0x3c0a0, 0xffffffff, 0x00040007,
1484 	0x3c0a4, 0xffffffff, 0x00060005,
1485 	0x3c0a8, 0xffffffff, 0x00090008,
1486 	0x3c0ac, 0xffffffff, 0x00010000,
1487 	0x3c0b0, 0xffffffff, 0x00030002,
1488 	0x3c0b4, 0xffffffff, 0x00040007,
1489 	0x3c0b8, 0xffffffff, 0x00060005,
1490 	0x3c0bc, 0xffffffff, 0x00090008,
1491 	0x3c0c0, 0xffffffff, 0x00010000,
1492 	0x3c0c4, 0xffffffff, 0x00030002,
1493 	0x3c0c8, 0xffffffff, 0x00040007,
1494 	0x3c0cc, 0xffffffff, 0x00060005,
1495 	0x3c0d0, 0xffffffff, 0x00090008,
1496 	0x3c0d4, 0xffffffff, 0x00010000,
1497 	0x3c0d8, 0xffffffff, 0x00030002,
1498 	0x3c0dc, 0xffffffff, 0x00040007,
1499 	0x3c0e0, 0xffffffff, 0x00060005,
1500 	0x3c0e4, 0xffffffff, 0x00090008,
1501 	0x3c0e8, 0xffffffff, 0x00010000,
1502 	0x3c0ec, 0xffffffff, 0x00030002,
1503 	0x3c0f0, 0xffffffff, 0x00040007,
1504 	0x3c0f4, 0xffffffff, 0x00060005,
1505 	0x3c0f8, 0xffffffff, 0x00090008,
1506 	0xc318, 0xffffffff, 0x00020200,
1507 	0x3350, 0xffffffff, 0x00000200,
1508 	0x15c0, 0xffffffff, 0x00000400,
1509 	0x55e8, 0xffffffff, 0x00000000,
1510 	0x2f50, 0xffffffff, 0x00000902,
1511 	0x3c000, 0xffffffff, 0x96940200,
1512 	0x8708, 0xffffffff, 0x00900100,
1513 	0xc424, 0xffffffff, 0x0020003f,
1514 	0x38, 0xffffffff, 0x0140001c,
1515 	0x3c, 0x000f0000, 0x000f0000,
1516 	0x220, 0xffffffff, 0xc060000c,
1517 	0x224, 0xc0000fff, 0x00000100,
1518 	0xf90, 0xffffffff, 0x00000100,
1519 	0xf98, 0x00000101, 0x00000000,
1520 	0x20a8, 0xffffffff, 0x00000104,
1521 	0x55e4, 0xff000fff, 0x00000100,
1522 	0x30cc, 0xc0000fff, 0x00000104,
1523 	0xc1e4, 0x00000001, 0x00000001,
1524 	0xd00c, 0xff000ff0, 0x00000100,
1525 	0xd80c, 0xff000ff0, 0x00000100
1526 };
1527 
1528 static const u32 godavari_golden_registers[] =
1529 {
1530 	0x55e4, 0xff607fff, 0xfc000100,
1531 	0x6ed8, 0x00010101, 0x00010000,
1532 	0x9830, 0xffffffff, 0x00000000,
1533 	0x98302, 0xf00fffff, 0x00000400,
1534 	0x6130, 0xffffffff, 0x00010000,
1535 	0x5bb0, 0x000000f0, 0x00000070,
1536 	0x5bc0, 0xf0311fff, 0x80300000,
1537 	0x98f8, 0x73773777, 0x12010001,
1538 	0x98fc, 0xffffffff, 0x00000010,
1539 	0x8030, 0x00001f0f, 0x0000100a,
1540 	0x2f48, 0x73773777, 0x12010001,
1541 	0x2408, 0x000fffff, 0x000c007f,
1542 	0x8a14, 0xf000003f, 0x00000007,
1543 	0x8b24, 0xffffffff, 0x00ff0fff,
1544 	0x30a04, 0x0000ff0f, 0x00000000,
1545 	0x28a4c, 0x07ffffff, 0x06000000,
1546 	0x4d8, 0x00000fff, 0x00000100,
1547 	0xd014, 0x00010000, 0x00810001,
1548 	0xd814, 0x00010000, 0x00810001,
1549 	0x3e78, 0x00000001, 0x00000002,
1550 	0xc768, 0x00000008, 0x00000008,
1551 	0xc770, 0x00000f00, 0x00000800,
1552 	0xc774, 0x00000f00, 0x00000800,
1553 	0xc798, 0x00ffffff, 0x00ff7fbf,
1554 	0xc79c, 0x00ffffff, 0x00ff7faf,
1555 	0x8c00, 0x000000ff, 0x00000001,
1556 	0x214f8, 0x01ff01ff, 0x00000002,
1557 	0x21498, 0x007ff800, 0x00200000,
1558 	0x2015c, 0xffffffff, 0x00000f40,
1559 	0x88c4, 0x001f3ae3, 0x00000082,
1560 	0x88d4, 0x0000001f, 0x00000010,
1561 	0x30934, 0xffffffff, 0x00000000
1562 };
1563 
1564 
1565 static void cik_init_golden_registers(struct radeon_device *rdev)
1566 {
1567 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568 	mutex_lock(&rdev->grbm_idx_mutex);
1569 	switch (rdev->family) {
1570 	case CHIP_BONAIRE:
1571 		radeon_program_register_sequence(rdev,
1572 						 bonaire_mgcg_cgcg_init,
1573 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574 		radeon_program_register_sequence(rdev,
1575 						 bonaire_golden_registers,
1576 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577 		radeon_program_register_sequence(rdev,
1578 						 bonaire_golden_common_registers,
1579 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580 		radeon_program_register_sequence(rdev,
1581 						 bonaire_golden_spm_registers,
1582 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583 		break;
1584 	case CHIP_KABINI:
1585 		radeon_program_register_sequence(rdev,
1586 						 kalindi_mgcg_cgcg_init,
1587 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588 		radeon_program_register_sequence(rdev,
1589 						 kalindi_golden_registers,
1590 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591 		radeon_program_register_sequence(rdev,
1592 						 kalindi_golden_common_registers,
1593 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594 		radeon_program_register_sequence(rdev,
1595 						 kalindi_golden_spm_registers,
1596 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597 		break;
1598 	case CHIP_MULLINS:
1599 		radeon_program_register_sequence(rdev,
1600 						 kalindi_mgcg_cgcg_init,
1601 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602 		radeon_program_register_sequence(rdev,
1603 						 godavari_golden_registers,
1604 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1605 		radeon_program_register_sequence(rdev,
1606 						 kalindi_golden_common_registers,
1607 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608 		radeon_program_register_sequence(rdev,
1609 						 kalindi_golden_spm_registers,
1610 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611 		break;
1612 	case CHIP_KAVERI:
1613 		radeon_program_register_sequence(rdev,
1614 						 spectre_mgcg_cgcg_init,
1615 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616 		radeon_program_register_sequence(rdev,
1617 						 spectre_golden_registers,
1618 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1619 		radeon_program_register_sequence(rdev,
1620 						 spectre_golden_common_registers,
1621 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622 		radeon_program_register_sequence(rdev,
1623 						 spectre_golden_spm_registers,
1624 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625 		break;
1626 	case CHIP_HAWAII:
1627 		radeon_program_register_sequence(rdev,
1628 						 hawaii_mgcg_cgcg_init,
1629 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630 		radeon_program_register_sequence(rdev,
1631 						 hawaii_golden_registers,
1632 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633 		radeon_program_register_sequence(rdev,
1634 						 hawaii_golden_common_registers,
1635 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 hawaii_golden_spm_registers,
1638 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639 		break;
1640 	default:
1641 		break;
1642 	}
1643 	mutex_unlock(&rdev->grbm_idx_mutex);
1644 }
1645 
1646 /**
1647  * cik_get_xclk - get the xclk
1648  *
1649  * @rdev: radeon_device pointer
1650  *
1651  * Returns the reference clock used by the gfx engine
1652  * (CIK).
1653  */
1654 u32 cik_get_xclk(struct radeon_device *rdev)
1655 {
1656         u32 reference_clock = rdev->clock.spll.reference_freq;
1657 
1658 	if (rdev->flags & RADEON_IS_IGP) {
1659 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660 			return reference_clock / 2;
1661 	} else {
1662 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663 			return reference_clock / 4;
1664 	}
1665 	return reference_clock;
1666 }
1667 
1668 /**
1669  * cik_mm_rdoorbell - read a doorbell dword
1670  *
1671  * @rdev: radeon_device pointer
1672  * @index: doorbell index
1673  *
1674  * Returns the value in the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678 {
1679 	if (index < rdev->doorbell.num_doorbells) {
1680 		return readl(rdev->doorbell.ptr + index);
1681 	} else {
1682 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683 		return 0;
1684 	}
1685 }
1686 
1687 /**
1688  * cik_mm_wdoorbell - write a doorbell dword
1689  *
1690  * @rdev: radeon_device pointer
1691  * @index: doorbell index
1692  * @v: value to write
1693  *
1694  * Writes @v to the doorbell aperture at the
1695  * requested doorbell index (CIK).
1696  */
1697 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698 {
1699 	if (index < rdev->doorbell.num_doorbells) {
1700 		writel(v, rdev->doorbell.ptr + index);
1701 	} else {
1702 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703 	}
1704 }
1705 
1706 #define BONAIRE_IO_MC_REGS_SIZE 36
1707 
1708 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709 {
1710 	{0x00000070, 0x04400000},
1711 	{0x00000071, 0x80c01803},
1712 	{0x00000072, 0x00004004},
1713 	{0x00000073, 0x00000100},
1714 	{0x00000074, 0x00ff0000},
1715 	{0x00000075, 0x34000000},
1716 	{0x00000076, 0x08000014},
1717 	{0x00000077, 0x00cc08ec},
1718 	{0x00000078, 0x00000400},
1719 	{0x00000079, 0x00000000},
1720 	{0x0000007a, 0x04090000},
1721 	{0x0000007c, 0x00000000},
1722 	{0x0000007e, 0x4408a8e8},
1723 	{0x0000007f, 0x00000304},
1724 	{0x00000080, 0x00000000},
1725 	{0x00000082, 0x00000001},
1726 	{0x00000083, 0x00000002},
1727 	{0x00000084, 0xf3e4f400},
1728 	{0x00000085, 0x052024e3},
1729 	{0x00000087, 0x00000000},
1730 	{0x00000088, 0x01000000},
1731 	{0x0000008a, 0x1c0a0000},
1732 	{0x0000008b, 0xff010000},
1733 	{0x0000008d, 0xffffefff},
1734 	{0x0000008e, 0xfff3efff},
1735 	{0x0000008f, 0xfff3efbf},
1736 	{0x00000092, 0xf7ffffff},
1737 	{0x00000093, 0xffffff7f},
1738 	{0x00000095, 0x00101101},
1739 	{0x00000096, 0x00000fff},
1740 	{0x00000097, 0x00116fff},
1741 	{0x00000098, 0x60010000},
1742 	{0x00000099, 0x10010000},
1743 	{0x0000009a, 0x00006000},
1744 	{0x0000009b, 0x00001000},
1745 	{0x0000009f, 0x00b48000}
1746 };
1747 
1748 #define HAWAII_IO_MC_REGS_SIZE 22
1749 
1750 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751 {
1752 	{0x0000007d, 0x40000000},
1753 	{0x0000007e, 0x40180304},
1754 	{0x0000007f, 0x0000ff00},
1755 	{0x00000081, 0x00000000},
1756 	{0x00000083, 0x00000800},
1757 	{0x00000086, 0x00000000},
1758 	{0x00000087, 0x00000100},
1759 	{0x00000088, 0x00020100},
1760 	{0x00000089, 0x00000000},
1761 	{0x0000008b, 0x00040000},
1762 	{0x0000008c, 0x00000100},
1763 	{0x0000008e, 0xff010000},
1764 	{0x00000090, 0xffffefff},
1765 	{0x00000091, 0xfff3efff},
1766 	{0x00000092, 0xfff3efbf},
1767 	{0x00000093, 0xf7ffffff},
1768 	{0x00000094, 0xffffff7f},
1769 	{0x00000095, 0x00000fff},
1770 	{0x00000096, 0x00116fff},
1771 	{0x00000097, 0x60010000},
1772 	{0x00000098, 0x10010000},
1773 	{0x0000009f, 0x00c79000}
1774 };
1775 
1776 
1777 /**
1778  * cik_srbm_select - select specific register instances
1779  *
1780  * @rdev: radeon_device pointer
1781  * @me: selected ME (micro engine)
1782  * @pipe: pipe
1783  * @queue: queue
1784  * @vmid: VMID
1785  *
1786  * Switches the currently active registers instances.  Some
1787  * registers are instanced per VMID, others are instanced per
1788  * me/pipe/queue combination.
1789  */
1790 static void cik_srbm_select(struct radeon_device *rdev,
1791 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1792 {
1793 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794 			     MEID(me & 0x3) |
1795 			     VMID(vmid & 0xf) |
1796 			     QUEUEID(queue & 0x7));
1797 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798 }
1799 
1800 /* ucode loading */
1801 /**
1802  * ci_mc_load_microcode - load MC ucode into the hw
1803  *
1804  * @rdev: radeon_device pointer
1805  *
1806  * Load the GDDR MC ucode into the hw (CIK).
1807  * Returns 0 on success, error on failure.
1808  */
1809 int ci_mc_load_microcode(struct radeon_device *rdev)
1810 {
1811 	const __be32 *fw_data = NULL;
1812 	const __le32 *new_fw_data = NULL;
1813 	u32 running, blackout = 0, tmp;
1814 	u32 *io_mc_regs = NULL;
1815 	const __le32 *new_io_mc_regs = NULL;
1816 	int i, regs_size, ucode_size;
1817 
1818 	if (!rdev->mc_fw)
1819 		return -EINVAL;
1820 
1821 	if (rdev->new_fw) {
1822 		const struct mc_firmware_header_v1_0 *hdr =
1823 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824 
1825 		radeon_ucode_print_mc_hdr(&hdr->header);
1826 
1827 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828 		new_io_mc_regs = (const __le32 *)
1829 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831 		new_fw_data = (const __le32 *)
1832 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833 	} else {
1834 		ucode_size = rdev->mc_fw->size / 4;
1835 
1836 		switch (rdev->family) {
1837 		case CHIP_BONAIRE:
1838 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840 			break;
1841 		case CHIP_HAWAII:
1842 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1844 			break;
1845 		default:
1846 			return -EINVAL;
1847 		}
1848 		fw_data = (const __be32 *)rdev->mc_fw->data;
1849 	}
1850 
1851 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852 
1853 	if (running == 0) {
1854 		if (running) {
1855 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857 		}
1858 
1859 		/* reset the engine and set to writable */
1860 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862 
1863 		/* load mc io regs */
1864 		for (i = 0; i < regs_size; i++) {
1865 			if (rdev->new_fw) {
1866 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868 			} else {
1869 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871 			}
1872 		}
1873 
1874 		tmp = RREG32(MC_SEQ_MISC0);
1875 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880 		}
1881 
1882 		/* load the MC ucode */
1883 		for (i = 0; i < ucode_size; i++) {
1884 			if (rdev->new_fw)
1885 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886 			else
1887 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888 		}
1889 
1890 		/* put the engine back into the active state */
1891 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894 
1895 		/* wait for training to complete */
1896 		for (i = 0; i < rdev->usec_timeout; i++) {
1897 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898 				break;
1899 			udelay(1);
1900 		}
1901 		for (i = 0; i < rdev->usec_timeout; i++) {
1902 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903 				break;
1904 			udelay(1);
1905 		}
1906 
1907 		if (running)
1908 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909 	}
1910 
1911 	return 0;
1912 }
1913 
1914 /**
1915  * cik_init_microcode - load ucode images from disk
1916  *
1917  * @rdev: radeon_device pointer
1918  *
1919  * Use the firmware interface to load the ucode images into
1920  * the driver (not loaded into hw).
1921  * Returns 0 on success, error on failure.
1922  */
1923 static int cik_init_microcode(struct radeon_device *rdev)
1924 {
1925 	const char *chip_name;
1926 	const char *new_chip_name;
1927 	size_t pfp_req_size, me_req_size, ce_req_size,
1928 		mec_req_size, rlc_req_size, mc_req_size = 0,
1929 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930 	char fw_name[30];
1931 	int new_fw = 0;
1932 	int err;
1933 	int num_fw;
1934 
1935 	DRM_DEBUG("\n");
1936 
1937 	switch (rdev->family) {
1938 	case CHIP_BONAIRE:
1939 		chip_name = "BONAIRE";
1940 		new_chip_name = "bonaire";
1941 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1943 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950 		num_fw = 8;
1951 		break;
1952 	case CHIP_HAWAII:
1953 		chip_name = "HAWAII";
1954 		new_chip_name = "hawaii";
1955 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1957 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964 		num_fw = 8;
1965 		break;
1966 	case CHIP_KAVERI:
1967 		chip_name = "KAVERI";
1968 		new_chip_name = "kaveri";
1969 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1971 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975 		num_fw = 7;
1976 		break;
1977 	case CHIP_KABINI:
1978 		chip_name = "KABINI";
1979 		new_chip_name = "kabini";
1980 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1982 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986 		num_fw = 6;
1987 		break;
1988 	case CHIP_MULLINS:
1989 		chip_name = "MULLINS";
1990 		new_chip_name = "mullins";
1991 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1993 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 		num_fw = 6;
1998 		break;
1999 	default: BUG();
2000 	}
2001 
2002 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003 
2004 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006 	if (err) {
2007 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009 		if (err)
2010 			goto out;
2011 		if (rdev->pfp_fw->size != pfp_req_size) {
2012 			printk(KERN_ERR
2013 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014 			       rdev->pfp_fw->size, fw_name);
2015 			err = -EINVAL;
2016 			goto out;
2017 		}
2018 	} else {
2019 		err = radeon_ucode_validate(rdev->pfp_fw);
2020 		if (err) {
2021 			printk(KERN_ERR
2022 			       "cik_fw: validation failed for firmware \"%s\"\n",
2023 			       fw_name);
2024 			goto out;
2025 		} else {
2026 			new_fw++;
2027 		}
2028 	}
2029 
2030 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032 	if (err) {
2033 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035 		if (err)
2036 			goto out;
2037 		if (rdev->me_fw->size != me_req_size) {
2038 			printk(KERN_ERR
2039 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040 			       rdev->me_fw->size, fw_name);
2041 			err = -EINVAL;
2042 		}
2043 	} else {
2044 		err = radeon_ucode_validate(rdev->me_fw);
2045 		if (err) {
2046 			printk(KERN_ERR
2047 			       "cik_fw: validation failed for firmware \"%s\"\n",
2048 			       fw_name);
2049 			goto out;
2050 		} else {
2051 			new_fw++;
2052 		}
2053 	}
2054 
2055 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057 	if (err) {
2058 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060 		if (err)
2061 			goto out;
2062 		if (rdev->ce_fw->size != ce_req_size) {
2063 			printk(KERN_ERR
2064 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065 			       rdev->ce_fw->size, fw_name);
2066 			err = -EINVAL;
2067 		}
2068 	} else {
2069 		err = radeon_ucode_validate(rdev->ce_fw);
2070 		if (err) {
2071 			printk(KERN_ERR
2072 			       "cik_fw: validation failed for firmware \"%s\"\n",
2073 			       fw_name);
2074 			goto out;
2075 		} else {
2076 			new_fw++;
2077 		}
2078 	}
2079 
2080 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082 	if (err) {
2083 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085 		if (err)
2086 			goto out;
2087 		if (rdev->mec_fw->size != mec_req_size) {
2088 			printk(KERN_ERR
2089 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090 			       rdev->mec_fw->size, fw_name);
2091 			err = -EINVAL;
2092 		}
2093 	} else {
2094 		err = radeon_ucode_validate(rdev->mec_fw);
2095 		if (err) {
2096 			printk(KERN_ERR
2097 			       "cik_fw: validation failed for firmware \"%s\"\n",
2098 			       fw_name);
2099 			goto out;
2100 		} else {
2101 			new_fw++;
2102 		}
2103 	}
2104 
2105 	if (rdev->family == CHIP_KAVERI) {
2106 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108 		if (err) {
2109 			goto out;
2110 		} else {
2111 			err = radeon_ucode_validate(rdev->mec2_fw);
2112 			if (err) {
2113 				goto out;
2114 			} else {
2115 				new_fw++;
2116 			}
2117 		}
2118 	}
2119 
2120 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122 	if (err) {
2123 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125 		if (err)
2126 			goto out;
2127 		if (rdev->rlc_fw->size != rlc_req_size) {
2128 			printk(KERN_ERR
2129 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130 			       rdev->rlc_fw->size, fw_name);
2131 			err = -EINVAL;
2132 		}
2133 	} else {
2134 		err = radeon_ucode_validate(rdev->rlc_fw);
2135 		if (err) {
2136 			printk(KERN_ERR
2137 			       "cik_fw: validation failed for firmware \"%s\"\n",
2138 			       fw_name);
2139 			goto out;
2140 		} else {
2141 			new_fw++;
2142 		}
2143 	}
2144 
2145 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147 	if (err) {
2148 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150 		if (err)
2151 			goto out;
2152 		if (rdev->sdma_fw->size != sdma_req_size) {
2153 			printk(KERN_ERR
2154 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155 			       rdev->sdma_fw->size, fw_name);
2156 			err = -EINVAL;
2157 		}
2158 	} else {
2159 		err = radeon_ucode_validate(rdev->sdma_fw);
2160 		if (err) {
2161 			printk(KERN_ERR
2162 			       "cik_fw: validation failed for firmware \"%s\"\n",
2163 			       fw_name);
2164 			goto out;
2165 		} else {
2166 			new_fw++;
2167 		}
2168 	}
2169 
2170 	/* No SMC, MC ucode on APUs */
2171 	if (!(rdev->flags & RADEON_IS_IGP)) {
2172 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174 		if (err) {
2175 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177 			if (err) {
2178 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180 				if (err)
2181 					goto out;
2182 			}
2183 			if ((rdev->mc_fw->size != mc_req_size) &&
2184 			    (rdev->mc_fw->size != mc2_req_size)){
2185 				printk(KERN_ERR
2186 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187 				       rdev->mc_fw->size, fw_name);
2188 				err = -EINVAL;
2189 			}
2190 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191 		} else {
2192 			err = radeon_ucode_validate(rdev->mc_fw);
2193 			if (err) {
2194 				printk(KERN_ERR
2195 				       "cik_fw: validation failed for firmware \"%s\"\n",
2196 				       fw_name);
2197 				goto out;
2198 			} else {
2199 				new_fw++;
2200 			}
2201 		}
2202 
2203 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205 		if (err) {
2206 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208 			if (err) {
2209 				printk(KERN_ERR
2210 				       "smc: error loading firmware \"%s\"\n",
2211 				       fw_name);
2212 				release_firmware(rdev->smc_fw);
2213 				rdev->smc_fw = NULL;
2214 				err = 0;
2215 			} else if (rdev->smc_fw->size != smc_req_size) {
2216 				printk(KERN_ERR
2217 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218 				       rdev->smc_fw->size, fw_name);
2219 				err = -EINVAL;
2220 			}
2221 		} else {
2222 			err = radeon_ucode_validate(rdev->smc_fw);
2223 			if (err) {
2224 				printk(KERN_ERR
2225 				       "cik_fw: validation failed for firmware \"%s\"\n",
2226 				       fw_name);
2227 				goto out;
2228 			} else {
2229 				new_fw++;
2230 			}
2231 		}
2232 	}
2233 
2234 	if (new_fw == 0) {
2235 		rdev->new_fw = false;
2236 	} else if (new_fw < num_fw) {
2237 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238 		err = -EINVAL;
2239 	} else {
2240 		rdev->new_fw = true;
2241 	}
2242 
2243 out:
2244 	if (err) {
2245 		if (err != -EINVAL)
2246 			printk(KERN_ERR
2247 			       "cik_cp: Failed to load firmware \"%s\"\n",
2248 			       fw_name);
2249 		release_firmware(rdev->pfp_fw);
2250 		rdev->pfp_fw = NULL;
2251 		release_firmware(rdev->me_fw);
2252 		rdev->me_fw = NULL;
2253 		release_firmware(rdev->ce_fw);
2254 		rdev->ce_fw = NULL;
2255 		release_firmware(rdev->mec_fw);
2256 		rdev->mec_fw = NULL;
2257 		release_firmware(rdev->mec2_fw);
2258 		rdev->mec2_fw = NULL;
2259 		release_firmware(rdev->rlc_fw);
2260 		rdev->rlc_fw = NULL;
2261 		release_firmware(rdev->sdma_fw);
2262 		rdev->sdma_fw = NULL;
2263 		release_firmware(rdev->mc_fw);
2264 		rdev->mc_fw = NULL;
2265 		release_firmware(rdev->smc_fw);
2266 		rdev->smc_fw = NULL;
2267 	}
2268 	return err;
2269 }
2270 
2271 /*
2272  * Core functions
2273  */
2274 /**
2275  * cik_tiling_mode_table_init - init the hw tiling table
2276  *
2277  * @rdev: radeon_device pointer
2278  *
2279  * Starting with SI, the tiling setup is done globally in a
2280  * set of 32 tiling modes.  Rather than selecting each set of
2281  * parameters per surface as on older asics, we just select
2282  * which index in the tiling table we want to use, and the
2283  * surface uses those parameters (CIK).
2284  */
2285 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286 {
2287 	const u32 num_tile_mode_states = 32;
2288 	const u32 num_secondary_tile_mode_states = 16;
2289 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290 	u32 num_pipe_configs;
2291 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292 		rdev->config.cik.max_shader_engines;
2293 
2294 	switch (rdev->config.cik.mem_row_size_in_kb) {
2295 	case 1:
2296 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297 		break;
2298 	case 2:
2299 	default:
2300 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301 		break;
2302 	case 4:
2303 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304 		break;
2305 	}
2306 
2307 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308 	if (num_pipe_configs > 8)
2309 		num_pipe_configs = 16;
2310 
2311 	if (num_pipe_configs == 16) {
2312 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313 			switch (reg_offset) {
2314 			case 0:
2315 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319 				break;
2320 			case 1:
2321 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325 				break;
2326 			case 2:
2327 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331 				break;
2332 			case 3:
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337 				break;
2338 			case 4:
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 						 TILE_SPLIT(split_equal_to_row_size));
2343 				break;
2344 			case 5:
2345 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 				break;
2349 			case 6:
2350 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354 				break;
2355 			case 7:
2356 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 						 TILE_SPLIT(split_equal_to_row_size));
2360 				break;
2361 			case 8:
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364 				break;
2365 			case 9:
2366 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369 				break;
2370 			case 10:
2371 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375 				break;
2376 			case 11:
2377 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 				break;
2382 			case 12:
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387 				break;
2388 			case 13:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392 				break;
2393 			case 14:
2394 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398 				break;
2399 			case 16:
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 				break;
2405 			case 17:
2406 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 				break;
2411 			case 27:
2412 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415 				break;
2416 			case 28:
2417 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 				break;
2422 			case 29:
2423 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 				break;
2428 			case 30:
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 				break;
2434 			default:
2435 				gb_tile_moden = 0;
2436 				break;
2437 			}
2438 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440 		}
2441 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442 			switch (reg_offset) {
2443 			case 0:
2444 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447 						 NUM_BANKS(ADDR_SURF_16_BANK));
2448 				break;
2449 			case 1:
2450 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK));
2454 				break;
2455 			case 2:
2456 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459 						 NUM_BANKS(ADDR_SURF_16_BANK));
2460 				break;
2461 			case 3:
2462 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 						 NUM_BANKS(ADDR_SURF_16_BANK));
2466 				break;
2467 			case 4:
2468 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 						 NUM_BANKS(ADDR_SURF_8_BANK));
2472 				break;
2473 			case 5:
2474 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 						 NUM_BANKS(ADDR_SURF_4_BANK));
2478 				break;
2479 			case 6:
2480 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 						 NUM_BANKS(ADDR_SURF_2_BANK));
2484 				break;
2485 			case 8:
2486 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 						 NUM_BANKS(ADDR_SURF_16_BANK));
2490 				break;
2491 			case 9:
2492 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 						 NUM_BANKS(ADDR_SURF_16_BANK));
2496 				break;
2497 			case 10:
2498 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501 						 NUM_BANKS(ADDR_SURF_16_BANK));
2502 				break;
2503 			case 11:
2504 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507 						 NUM_BANKS(ADDR_SURF_8_BANK));
2508 				break;
2509 			case 12:
2510 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513 						 NUM_BANKS(ADDR_SURF_4_BANK));
2514 				break;
2515 			case 13:
2516 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 						 NUM_BANKS(ADDR_SURF_2_BANK));
2520 				break;
2521 			case 14:
2522 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525 						 NUM_BANKS(ADDR_SURF_2_BANK));
2526 				break;
2527 			default:
2528 				gb_tile_moden = 0;
2529 				break;
2530 			}
2531 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533 		}
2534 	} else if (num_pipe_configs == 8) {
2535 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536 			switch (reg_offset) {
2537 			case 0:
2538 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542 				break;
2543 			case 1:
2544 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548 				break;
2549 			case 2:
2550 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554 				break;
2555 			case 3:
2556 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560 				break;
2561 			case 4:
2562 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 						 TILE_SPLIT(split_equal_to_row_size));
2566 				break;
2567 			case 5:
2568 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571 				break;
2572 			case 6:
2573 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577 				break;
2578 			case 7:
2579 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 						 TILE_SPLIT(split_equal_to_row_size));
2583 				break;
2584 			case 8:
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587 				break;
2588 			case 9:
2589 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592 				break;
2593 			case 10:
2594 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598 				break;
2599 			case 11:
2600 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604 				break;
2605 			case 12:
2606 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610 				break;
2611 			case 13:
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615 				break;
2616 			case 14:
2617 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 				break;
2622 			case 16:
2623 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 				break;
2628 			case 17:
2629 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633 				break;
2634 			case 27:
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638 				break;
2639 			case 28:
2640 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 				break;
2645 			case 29:
2646 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 				break;
2651 			case 30:
2652 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 				break;
2657 			default:
2658 				gb_tile_moden = 0;
2659 				break;
2660 			}
2661 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663 		}
2664 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665 			switch (reg_offset) {
2666 			case 0:
2667 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670 						 NUM_BANKS(ADDR_SURF_16_BANK));
2671 				break;
2672 			case 1:
2673 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676 						 NUM_BANKS(ADDR_SURF_16_BANK));
2677 				break;
2678 			case 2:
2679 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682 						 NUM_BANKS(ADDR_SURF_16_BANK));
2683 				break;
2684 			case 3:
2685 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688 						 NUM_BANKS(ADDR_SURF_16_BANK));
2689 				break;
2690 			case 4:
2691 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694 						 NUM_BANKS(ADDR_SURF_8_BANK));
2695 				break;
2696 			case 5:
2697 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700 						 NUM_BANKS(ADDR_SURF_4_BANK));
2701 				break;
2702 			case 6:
2703 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706 						 NUM_BANKS(ADDR_SURF_2_BANK));
2707 				break;
2708 			case 8:
2709 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712 						 NUM_BANKS(ADDR_SURF_16_BANK));
2713 				break;
2714 			case 9:
2715 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 						 NUM_BANKS(ADDR_SURF_16_BANK));
2719 				break;
2720 			case 10:
2721 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724 						 NUM_BANKS(ADDR_SURF_16_BANK));
2725 				break;
2726 			case 11:
2727 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730 						 NUM_BANKS(ADDR_SURF_16_BANK));
2731 				break;
2732 			case 12:
2733 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736 						 NUM_BANKS(ADDR_SURF_8_BANK));
2737 				break;
2738 			case 13:
2739 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742 						 NUM_BANKS(ADDR_SURF_4_BANK));
2743 				break;
2744 			case 14:
2745 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748 						 NUM_BANKS(ADDR_SURF_2_BANK));
2749 				break;
2750 			default:
2751 				gb_tile_moden = 0;
2752 				break;
2753 			}
2754 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756 		}
2757 	} else if (num_pipe_configs == 4) {
2758 		if (num_rbs == 4) {
2759 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760 				switch (reg_offset) {
2761 				case 0:
2762 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766 					break;
2767 				case 1:
2768 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772 					break;
2773 				case 2:
2774 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778 					break;
2779 				case 3:
2780 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784 					break;
2785 				case 4:
2786 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 							 TILE_SPLIT(split_equal_to_row_size));
2790 					break;
2791 				case 5:
2792 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795 					break;
2796 				case 6:
2797 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801 					break;
2802 				case 7:
2803 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 							 TILE_SPLIT(split_equal_to_row_size));
2807 					break;
2808 				case 8:
2809 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811 					break;
2812 				case 9:
2813 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816 					break;
2817 				case 10:
2818 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 					break;
2823 				case 11:
2824 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828 					break;
2829 				case 12:
2830 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834 					break;
2835 				case 13:
2836 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839 					break;
2840 				case 14:
2841 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845 					break;
2846 				case 16:
2847 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 					break;
2852 				case 17:
2853 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857 					break;
2858 				case 27:
2859 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862 					break;
2863 				case 28:
2864 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868 					break;
2869 				case 29:
2870 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874 					break;
2875 				case 30:
2876 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880 					break;
2881 				default:
2882 					gb_tile_moden = 0;
2883 					break;
2884 				}
2885 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887 			}
2888 		} else if (num_rbs < 4) {
2889 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890 				switch (reg_offset) {
2891 				case 0:
2892 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896 					break;
2897 				case 1:
2898 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902 					break;
2903 				case 2:
2904 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 					break;
2909 				case 3:
2910 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914 					break;
2915 				case 4:
2916 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919 							 TILE_SPLIT(split_equal_to_row_size));
2920 					break;
2921 				case 5:
2922 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925 					break;
2926 				case 6:
2927 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931 					break;
2932 				case 7:
2933 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936 							 TILE_SPLIT(split_equal_to_row_size));
2937 					break;
2938 				case 8:
2939 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941 					break;
2942 				case 9:
2943 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946 					break;
2947 				case 10:
2948 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952 					break;
2953 				case 11:
2954 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958 					break;
2959 				case 12:
2960 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 					break;
2965 				case 13:
2966 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969 					break;
2970 				case 14:
2971 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975 					break;
2976 				case 16:
2977 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981 					break;
2982 				case 17:
2983 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987 					break;
2988 				case 27:
2989 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992 					break;
2993 				case 28:
2994 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998 					break;
2999 				case 29:
3000 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004 					break;
3005 				case 30:
3006 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 					break;
3011 				default:
3012 					gb_tile_moden = 0;
3013 					break;
3014 				}
3015 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017 			}
3018 		}
3019 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020 			switch (reg_offset) {
3021 			case 0:
3022 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025 						 NUM_BANKS(ADDR_SURF_16_BANK));
3026 				break;
3027 			case 1:
3028 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 						 NUM_BANKS(ADDR_SURF_16_BANK));
3032 				break;
3033 			case 2:
3034 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037 						 NUM_BANKS(ADDR_SURF_16_BANK));
3038 				break;
3039 			case 3:
3040 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043 						 NUM_BANKS(ADDR_SURF_16_BANK));
3044 				break;
3045 			case 4:
3046 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049 						 NUM_BANKS(ADDR_SURF_16_BANK));
3050 				break;
3051 			case 5:
3052 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055 						 NUM_BANKS(ADDR_SURF_8_BANK));
3056 				break;
3057 			case 6:
3058 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061 						 NUM_BANKS(ADDR_SURF_4_BANK));
3062 				break;
3063 			case 8:
3064 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067 						 NUM_BANKS(ADDR_SURF_16_BANK));
3068 				break;
3069 			case 9:
3070 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073 						 NUM_BANKS(ADDR_SURF_16_BANK));
3074 				break;
3075 			case 10:
3076 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079 						 NUM_BANKS(ADDR_SURF_16_BANK));
3080 				break;
3081 			case 11:
3082 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085 						 NUM_BANKS(ADDR_SURF_16_BANK));
3086 				break;
3087 			case 12:
3088 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091 						 NUM_BANKS(ADDR_SURF_16_BANK));
3092 				break;
3093 			case 13:
3094 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097 						 NUM_BANKS(ADDR_SURF_8_BANK));
3098 				break;
3099 			case 14:
3100 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103 						 NUM_BANKS(ADDR_SURF_4_BANK));
3104 				break;
3105 			default:
3106 				gb_tile_moden = 0;
3107 				break;
3108 			}
3109 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111 		}
3112 	} else if (num_pipe_configs == 2) {
3113 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114 			switch (reg_offset) {
3115 			case 0:
3116 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118 						 PIPE_CONFIG(ADDR_SURF_P2) |
3119 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120 				break;
3121 			case 1:
3122 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124 						 PIPE_CONFIG(ADDR_SURF_P2) |
3125 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126 				break;
3127 			case 2:
3128 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130 						 PIPE_CONFIG(ADDR_SURF_P2) |
3131 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132 				break;
3133 			case 3:
3134 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136 						 PIPE_CONFIG(ADDR_SURF_P2) |
3137 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138 				break;
3139 			case 4:
3140 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142 						 PIPE_CONFIG(ADDR_SURF_P2) |
3143 						 TILE_SPLIT(split_equal_to_row_size));
3144 				break;
3145 			case 5:
3146 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147 						 PIPE_CONFIG(ADDR_SURF_P2) |
3148 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149 				break;
3150 			case 6:
3151 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153 						 PIPE_CONFIG(ADDR_SURF_P2) |
3154 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155 				break;
3156 			case 7:
3157 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159 						 PIPE_CONFIG(ADDR_SURF_P2) |
3160 						 TILE_SPLIT(split_equal_to_row_size));
3161 				break;
3162 			case 8:
3163 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164 						PIPE_CONFIG(ADDR_SURF_P2);
3165 				break;
3166 			case 9:
3167 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169 						 PIPE_CONFIG(ADDR_SURF_P2));
3170 				break;
3171 			case 10:
3172 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174 						 PIPE_CONFIG(ADDR_SURF_P2) |
3175 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176 				break;
3177 			case 11:
3178 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180 						 PIPE_CONFIG(ADDR_SURF_P2) |
3181 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182 				break;
3183 			case 12:
3184 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186 						 PIPE_CONFIG(ADDR_SURF_P2) |
3187 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188 				break;
3189 			case 13:
3190 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191 						 PIPE_CONFIG(ADDR_SURF_P2) |
3192 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193 				break;
3194 			case 14:
3195 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197 						 PIPE_CONFIG(ADDR_SURF_P2) |
3198 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 				break;
3200 			case 16:
3201 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203 						 PIPE_CONFIG(ADDR_SURF_P2) |
3204 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205 				break;
3206 			case 17:
3207 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209 						 PIPE_CONFIG(ADDR_SURF_P2) |
3210 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 				break;
3212 			case 27:
3213 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215 						 PIPE_CONFIG(ADDR_SURF_P2));
3216 				break;
3217 			case 28:
3218 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220 						 PIPE_CONFIG(ADDR_SURF_P2) |
3221 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222 				break;
3223 			case 29:
3224 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226 						 PIPE_CONFIG(ADDR_SURF_P2) |
3227 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228 				break;
3229 			case 30:
3230 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232 						 PIPE_CONFIG(ADDR_SURF_P2) |
3233 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234 				break;
3235 			default:
3236 				gb_tile_moden = 0;
3237 				break;
3238 			}
3239 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241 		}
3242 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243 			switch (reg_offset) {
3244 			case 0:
3245 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248 						 NUM_BANKS(ADDR_SURF_16_BANK));
3249 				break;
3250 			case 1:
3251 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254 						 NUM_BANKS(ADDR_SURF_16_BANK));
3255 				break;
3256 			case 2:
3257 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260 						 NUM_BANKS(ADDR_SURF_16_BANK));
3261 				break;
3262 			case 3:
3263 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266 						 NUM_BANKS(ADDR_SURF_16_BANK));
3267 				break;
3268 			case 4:
3269 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272 						 NUM_BANKS(ADDR_SURF_16_BANK));
3273 				break;
3274 			case 5:
3275 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278 						 NUM_BANKS(ADDR_SURF_16_BANK));
3279 				break;
3280 			case 6:
3281 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284 						 NUM_BANKS(ADDR_SURF_8_BANK));
3285 				break;
3286 			case 8:
3287 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290 						 NUM_BANKS(ADDR_SURF_16_BANK));
3291 				break;
3292 			case 9:
3293 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296 						 NUM_BANKS(ADDR_SURF_16_BANK));
3297 				break;
3298 			case 10:
3299 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302 						 NUM_BANKS(ADDR_SURF_16_BANK));
3303 				break;
3304 			case 11:
3305 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308 						 NUM_BANKS(ADDR_SURF_16_BANK));
3309 				break;
3310 			case 12:
3311 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314 						 NUM_BANKS(ADDR_SURF_16_BANK));
3315 				break;
3316 			case 13:
3317 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 						 NUM_BANKS(ADDR_SURF_16_BANK));
3321 				break;
3322 			case 14:
3323 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326 						 NUM_BANKS(ADDR_SURF_8_BANK));
3327 				break;
3328 			default:
3329 				gb_tile_moden = 0;
3330 				break;
3331 			}
3332 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334 		}
3335 	} else
3336 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337 }
3338 
3339 /**
3340  * cik_select_se_sh - select which SE, SH to address
3341  *
3342  * @rdev: radeon_device pointer
3343  * @se_num: shader engine to address
3344  * @sh_num: sh block to address
3345  *
3346  * Select which SE, SH combinations to address. Certain
3347  * registers are instanced per SE or SH.  0xffffffff means
3348  * broadcast to all SEs or SHs (CIK).
3349  */
3350 static void cik_select_se_sh(struct radeon_device *rdev,
3351 			     u32 se_num, u32 sh_num)
3352 {
3353 	u32 data = INSTANCE_BROADCAST_WRITES;
3354 
3355 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357 	else if (se_num == 0xffffffff)
3358 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359 	else if (sh_num == 0xffffffff)
3360 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361 	else
3362 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363 	WREG32(GRBM_GFX_INDEX, data);
3364 }
3365 
3366 /**
3367  * cik_create_bitmask - create a bitmask
3368  *
3369  * @bit_width: length of the mask
3370  *
3371  * create a variable length bit mask (CIK).
3372  * Returns the bitmask.
3373  */
3374 static u32 cik_create_bitmask(u32 bit_width)
3375 {
3376 	u32 i, mask = 0;
3377 
3378 	for (i = 0; i < bit_width; i++) {
3379 		mask <<= 1;
3380 		mask |= 1;
3381 	}
3382 	return mask;
3383 }
3384 
3385 /**
3386  * cik_get_rb_disabled - computes the mask of disabled RBs
3387  *
3388  * @rdev: radeon_device pointer
3389  * @max_rb_num: max RBs (render backends) for the asic
3390  * @se_num: number of SEs (shader engines) for the asic
3391  * @sh_per_se: number of SH blocks per SE for the asic
3392  *
3393  * Calculates the bitmask of disabled RBs (CIK).
3394  * Returns the disabled RB bitmask.
3395  */
3396 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397 			      u32 max_rb_num_per_se,
3398 			      u32 sh_per_se)
3399 {
3400 	u32 data, mask;
3401 
3402 	data = RREG32(CC_RB_BACKEND_DISABLE);
3403 	if (data & 1)
3404 		data &= BACKEND_DISABLE_MASK;
3405 	else
3406 		data = 0;
3407 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408 
3409 	data >>= BACKEND_DISABLE_SHIFT;
3410 
3411 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412 
3413 	return data & mask;
3414 }
3415 
3416 /**
3417  * cik_setup_rb - setup the RBs on the asic
3418  *
3419  * @rdev: radeon_device pointer
3420  * @se_num: number of SEs (shader engines) for the asic
3421  * @sh_per_se: number of SH blocks per SE for the asic
3422  * @max_rb_num: max RBs (render backends) for the asic
3423  *
3424  * Configures per-SE/SH RB registers (CIK).
3425  */
3426 static void cik_setup_rb(struct radeon_device *rdev,
3427 			 u32 se_num, u32 sh_per_se,
3428 			 u32 max_rb_num_per_se)
3429 {
3430 	int i, j;
3431 	u32 data, mask;
3432 	u32 disabled_rbs = 0;
3433 	u32 enabled_rbs = 0;
3434 
3435 	mutex_lock(&rdev->grbm_idx_mutex);
3436 	for (i = 0; i < se_num; i++) {
3437 		for (j = 0; j < sh_per_se; j++) {
3438 			cik_select_se_sh(rdev, i, j);
3439 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440 			if (rdev->family == CHIP_HAWAII)
3441 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442 			else
3443 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444 		}
3445 	}
3446 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3447 	mutex_unlock(&rdev->grbm_idx_mutex);
3448 
3449 	mask = 1;
3450 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451 		if (!(disabled_rbs & mask))
3452 			enabled_rbs |= mask;
3453 		mask <<= 1;
3454 	}
3455 
3456 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3457 
3458 	mutex_lock(&rdev->grbm_idx_mutex);
3459 	for (i = 0; i < se_num; i++) {
3460 		cik_select_se_sh(rdev, i, 0xffffffff);
3461 		data = 0;
3462 		for (j = 0; j < sh_per_se; j++) {
3463 			switch (enabled_rbs & 3) {
3464 			case 0:
3465 				if (j == 0)
3466 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467 				else
3468 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469 				break;
3470 			case 1:
3471 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472 				break;
3473 			case 2:
3474 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475 				break;
3476 			case 3:
3477 			default:
3478 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479 				break;
3480 			}
3481 			enabled_rbs >>= 2;
3482 		}
3483 		WREG32(PA_SC_RASTER_CONFIG, data);
3484 	}
3485 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3486 	mutex_unlock(&rdev->grbm_idx_mutex);
3487 }
3488 
3489 /**
3490  * cik_gpu_init - setup the 3D engine
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Configures the 3D engine and tiling configuration
3495  * registers so that the 3D engine is usable.
3496  */
3497 static void cik_gpu_init(struct radeon_device *rdev)
3498 {
3499 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500 	u32 mc_shared_chmap, mc_arb_ramcfg;
3501 	u32 hdp_host_path_cntl;
3502 	u32 tmp;
3503 	int i, j;
3504 
3505 	switch (rdev->family) {
3506 	case CHIP_BONAIRE:
3507 		rdev->config.cik.max_shader_engines = 2;
3508 		rdev->config.cik.max_tile_pipes = 4;
3509 		rdev->config.cik.max_cu_per_sh = 7;
3510 		rdev->config.cik.max_sh_per_se = 1;
3511 		rdev->config.cik.max_backends_per_se = 2;
3512 		rdev->config.cik.max_texture_channel_caches = 4;
3513 		rdev->config.cik.max_gprs = 256;
3514 		rdev->config.cik.max_gs_threads = 32;
3515 		rdev->config.cik.max_hw_contexts = 8;
3516 
3517 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522 		break;
3523 	case CHIP_HAWAII:
3524 		rdev->config.cik.max_shader_engines = 4;
3525 		rdev->config.cik.max_tile_pipes = 16;
3526 		rdev->config.cik.max_cu_per_sh = 11;
3527 		rdev->config.cik.max_sh_per_se = 1;
3528 		rdev->config.cik.max_backends_per_se = 4;
3529 		rdev->config.cik.max_texture_channel_caches = 16;
3530 		rdev->config.cik.max_gprs = 256;
3531 		rdev->config.cik.max_gs_threads = 32;
3532 		rdev->config.cik.max_hw_contexts = 8;
3533 
3534 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539 		break;
3540 	case CHIP_KAVERI:
3541 		rdev->config.cik.max_shader_engines = 1;
3542 		rdev->config.cik.max_tile_pipes = 4;
3543 		if ((rdev->pdev->device == 0x1304) ||
3544 		    (rdev->pdev->device == 0x1305) ||
3545 		    (rdev->pdev->device == 0x130C) ||
3546 		    (rdev->pdev->device == 0x130F) ||
3547 		    (rdev->pdev->device == 0x1310) ||
3548 		    (rdev->pdev->device == 0x1311) ||
3549 		    (rdev->pdev->device == 0x131C)) {
3550 			rdev->config.cik.max_cu_per_sh = 8;
3551 			rdev->config.cik.max_backends_per_se = 2;
3552 		} else if ((rdev->pdev->device == 0x1309) ||
3553 			   (rdev->pdev->device == 0x130A) ||
3554 			   (rdev->pdev->device == 0x130D) ||
3555 			   (rdev->pdev->device == 0x1313) ||
3556 			   (rdev->pdev->device == 0x131D)) {
3557 			rdev->config.cik.max_cu_per_sh = 6;
3558 			rdev->config.cik.max_backends_per_se = 2;
3559 		} else if ((rdev->pdev->device == 0x1306) ||
3560 			   (rdev->pdev->device == 0x1307) ||
3561 			   (rdev->pdev->device == 0x130B) ||
3562 			   (rdev->pdev->device == 0x130E) ||
3563 			   (rdev->pdev->device == 0x1315) ||
3564 			   (rdev->pdev->device == 0x1318) ||
3565 			   (rdev->pdev->device == 0x131B)) {
3566 			rdev->config.cik.max_cu_per_sh = 4;
3567 			rdev->config.cik.max_backends_per_se = 1;
3568 		} else {
3569 			rdev->config.cik.max_cu_per_sh = 3;
3570 			rdev->config.cik.max_backends_per_se = 1;
3571 		}
3572 		rdev->config.cik.max_sh_per_se = 1;
3573 		rdev->config.cik.max_texture_channel_caches = 4;
3574 		rdev->config.cik.max_gprs = 256;
3575 		rdev->config.cik.max_gs_threads = 16;
3576 		rdev->config.cik.max_hw_contexts = 8;
3577 
3578 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583 		break;
3584 	case CHIP_KABINI:
3585 	case CHIP_MULLINS:
3586 	default:
3587 		rdev->config.cik.max_shader_engines = 1;
3588 		rdev->config.cik.max_tile_pipes = 2;
3589 		rdev->config.cik.max_cu_per_sh = 2;
3590 		rdev->config.cik.max_sh_per_se = 1;
3591 		rdev->config.cik.max_backends_per_se = 1;
3592 		rdev->config.cik.max_texture_channel_caches = 2;
3593 		rdev->config.cik.max_gprs = 256;
3594 		rdev->config.cik.max_gs_threads = 16;
3595 		rdev->config.cik.max_hw_contexts = 8;
3596 
3597 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602 		break;
3603 	}
3604 
3605 	/* Initialize HDP */
3606 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607 		WREG32((0x2c14 + j), 0x00000000);
3608 		WREG32((0x2c18 + j), 0x00000000);
3609 		WREG32((0x2c1c + j), 0x00000000);
3610 		WREG32((0x2c20 + j), 0x00000000);
3611 		WREG32((0x2c24 + j), 0x00000000);
3612 	}
3613 
3614 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615 
3616 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617 
3618 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620 
3621 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3623 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3626 		rdev->config.cik.mem_row_size_in_kb = 4;
3627 	/* XXX use MC settings? */
3628 	rdev->config.cik.shader_engine_tile_size = 32;
3629 	rdev->config.cik.num_gpus = 1;
3630 	rdev->config.cik.multi_gpu_tile_size = 64;
3631 
3632 	/* fix up row size */
3633 	gb_addr_config &= ~ROW_SIZE_MASK;
3634 	switch (rdev->config.cik.mem_row_size_in_kb) {
3635 	case 1:
3636 	default:
3637 		gb_addr_config |= ROW_SIZE(0);
3638 		break;
3639 	case 2:
3640 		gb_addr_config |= ROW_SIZE(1);
3641 		break;
3642 	case 4:
3643 		gb_addr_config |= ROW_SIZE(2);
3644 		break;
3645 	}
3646 
3647 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3648 	 * not have bank info, so create a custom tiling dword.
3649 	 * bits 3:0   num_pipes
3650 	 * bits 7:4   num_banks
3651 	 * bits 11:8  group_size
3652 	 * bits 15:12 row_size
3653 	 */
3654 	rdev->config.cik.tile_config = 0;
3655 	switch (rdev->config.cik.num_tile_pipes) {
3656 	case 1:
3657 		rdev->config.cik.tile_config |= (0 << 0);
3658 		break;
3659 	case 2:
3660 		rdev->config.cik.tile_config |= (1 << 0);
3661 		break;
3662 	case 4:
3663 		rdev->config.cik.tile_config |= (2 << 0);
3664 		break;
3665 	case 8:
3666 	default:
3667 		/* XXX what about 12? */
3668 		rdev->config.cik.tile_config |= (3 << 0);
3669 		break;
3670 	}
3671 	rdev->config.cik.tile_config |=
3672 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673 	rdev->config.cik.tile_config |=
3674 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675 	rdev->config.cik.tile_config |=
3676 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677 
3678 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686 
3687 	cik_tiling_mode_table_init(rdev);
3688 
3689 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690 		     rdev->config.cik.max_sh_per_se,
3691 		     rdev->config.cik.max_backends_per_se);
3692 
3693 	rdev->config.cik.active_cus = 0;
3694 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696 			rdev->config.cik.active_cus +=
3697 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698 		}
3699 	}
3700 
3701 	/* set HW defaults for 3D engine */
3702 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703 
3704 	mutex_lock(&rdev->grbm_idx_mutex);
3705 	/*
3706 	 * making sure that the following register writes will be broadcasted
3707 	 * to all the shaders
3708 	 */
3709 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3710 	WREG32(SX_DEBUG_1, 0x20);
3711 
3712 	WREG32(TA_CNTL_AUX, 0x00010000);
3713 
3714 	tmp = RREG32(SPI_CONFIG_CNTL);
3715 	tmp |= 0x03000000;
3716 	WREG32(SPI_CONFIG_CNTL, tmp);
3717 
3718 	WREG32(SQ_CONFIG, 1);
3719 
3720 	WREG32(DB_DEBUG, 0);
3721 
3722 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723 	tmp |= 0x00000400;
3724 	WREG32(DB_DEBUG2, tmp);
3725 
3726 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727 	tmp |= 0x00020200;
3728 	WREG32(DB_DEBUG3, tmp);
3729 
3730 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731 	tmp |= 0x00018208;
3732 	WREG32(CB_HW_CONTROL, tmp);
3733 
3734 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735 
3736 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740 
3741 	WREG32(VGT_NUM_INSTANCES, 1);
3742 
3743 	WREG32(CP_PERFMON_CNTL, 0);
3744 
3745 	WREG32(SQ_CONFIG, 0);
3746 
3747 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748 					  FORCE_EOV_MAX_REZ_CNT(255)));
3749 
3750 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752 
3753 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3754 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755 
3756 	tmp = RREG32(HDP_MISC_CNTL);
3757 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758 	WREG32(HDP_MISC_CNTL, tmp);
3759 
3760 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762 
3763 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3765 	mutex_unlock(&rdev->grbm_idx_mutex);
3766 
3767 	udelay(50);
3768 }
3769 
3770 /*
3771  * GPU scratch registers helpers function.
3772  */
3773 /**
3774  * cik_scratch_init - setup driver info for CP scratch regs
3775  *
3776  * @rdev: radeon_device pointer
3777  *
3778  * Set up the number and offset of the CP scratch registers.
3779  * NOTE: use of CP scratch registers is a legacy inferface and
3780  * is not used by default on newer asics (r6xx+).  On newer asics,
3781  * memory buffers are used for fences rather than scratch regs.
3782  */
3783 static void cik_scratch_init(struct radeon_device *rdev)
3784 {
3785 	int i;
3786 
3787 	rdev->scratch.num_reg = 7;
3788 	rdev->scratch.reg_base = SCRATCH_REG0;
3789 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3790 		rdev->scratch.free[i] = true;
3791 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792 	}
3793 }
3794 
3795 /**
3796  * cik_ring_test - basic gfx ring test
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ring: radeon_ring structure holding ring information
3800  *
3801  * Allocate a scratch register and write to it using the gfx ring (CIK).
3802  * Provides a basic gfx ring test to verify that the ring is working.
3803  * Used by cik_cp_gfx_resume();
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808 	uint32_t scratch;
3809 	uint32_t tmp = 0;
3810 	unsigned i;
3811 	int r;
3812 
3813 	r = radeon_scratch_get(rdev, &scratch);
3814 	if (r) {
3815 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816 		return r;
3817 	}
3818 	WREG32(scratch, 0xCAFEDEAD);
3819 	r = radeon_ring_lock(rdev, ring, 3);
3820 	if (r) {
3821 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822 		radeon_scratch_free(rdev, scratch);
3823 		return r;
3824 	}
3825 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827 	radeon_ring_write(ring, 0xDEADBEEF);
3828 	radeon_ring_unlock_commit(rdev, ring, false);
3829 
3830 	for (i = 0; i < rdev->usec_timeout; i++) {
3831 		tmp = RREG32(scratch);
3832 		if (tmp == 0xDEADBEEF)
3833 			break;
3834 		DRM_UDELAY(1);
3835 	}
3836 	if (i < rdev->usec_timeout) {
3837 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838 	} else {
3839 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840 			  ring->idx, scratch, tmp);
3841 		r = -EINVAL;
3842 	}
3843 	radeon_scratch_free(rdev, scratch);
3844 	return r;
3845 }
3846 
3847 /**
3848  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849  *
3850  * @rdev: radeon_device pointer
3851  * @ridx: radeon ring index
3852  *
3853  * Emits an hdp flush on the cp.
3854  */
3855 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856 				       int ridx)
3857 {
3858 	struct radeon_ring *ring = &rdev->ring[ridx];
3859 	u32 ref_and_mask;
3860 
3861 	switch (ring->idx) {
3862 	case CAYMAN_RING_TYPE_CP1_INDEX:
3863 	case CAYMAN_RING_TYPE_CP2_INDEX:
3864 	default:
3865 		switch (ring->me) {
3866 		case 0:
3867 			ref_and_mask = CP2 << ring->pipe;
3868 			break;
3869 		case 1:
3870 			ref_and_mask = CP6 << ring->pipe;
3871 			break;
3872 		default:
3873 			return;
3874 		}
3875 		break;
3876 	case RADEON_RING_TYPE_GFX_INDEX:
3877 		ref_and_mask = CP0;
3878 		break;
3879 	}
3880 
3881 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887 	radeon_ring_write(ring, ref_and_mask);
3888 	radeon_ring_write(ring, ref_and_mask);
3889 	radeon_ring_write(ring, 0x20); /* poll interval */
3890 }
3891 
3892 /**
3893  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894  *
3895  * @rdev: radeon_device pointer
3896  * @fence: radeon fence object
3897  *
3898  * Emits a fence sequnce number on the gfx ring and flushes
3899  * GPU caches.
3900  */
3901 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902 			     struct radeon_fence *fence)
3903 {
3904 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3905 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906 
3907 	/* EVENT_WRITE_EOP - flush caches, send int */
3908 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910 				 EOP_TC_ACTION_EN |
3911 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912 				 EVENT_INDEX(5)));
3913 	radeon_ring_write(ring, addr & 0xfffffffc);
3914 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915 	radeon_ring_write(ring, fence->seq);
3916 	radeon_ring_write(ring, 0);
3917 }
3918 
3919 /**
3920  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921  *
3922  * @rdev: radeon_device pointer
3923  * @fence: radeon fence object
3924  *
3925  * Emits a fence sequnce number on the compute ring and flushes
3926  * GPU caches.
3927  */
3928 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929 				 struct radeon_fence *fence)
3930 {
3931 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3932 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933 
3934 	/* RELEASE_MEM - flush caches, send int */
3935 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937 				 EOP_TC_ACTION_EN |
3938 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939 				 EVENT_INDEX(5)));
3940 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941 	radeon_ring_write(ring, addr & 0xfffffffc);
3942 	radeon_ring_write(ring, upper_32_bits(addr));
3943 	radeon_ring_write(ring, fence->seq);
3944 	radeon_ring_write(ring, 0);
3945 }
3946 
3947 /**
3948  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949  *
3950  * @rdev: radeon_device pointer
3951  * @ring: radeon ring buffer object
3952  * @semaphore: radeon semaphore object
3953  * @emit_wait: Is this a sempahore wait?
3954  *
3955  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956  * from running ahead of semaphore waits.
3957  */
3958 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959 			     struct radeon_ring *ring,
3960 			     struct radeon_semaphore *semaphore,
3961 			     bool emit_wait)
3962 {
3963 	uint64_t addr = semaphore->gpu_addr;
3964 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965 
3966 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967 	radeon_ring_write(ring, lower_32_bits(addr));
3968 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969 
3970 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971 		/* Prevent the PFP from running ahead of the semaphore wait */
3972 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973 		radeon_ring_write(ring, 0x0);
3974 	}
3975 
3976 	return true;
3977 }
3978 
3979 /**
3980  * cik_copy_cpdma - copy pages using the CP DMA engine
3981  *
3982  * @rdev: radeon_device pointer
3983  * @src_offset: src GPU address
3984  * @dst_offset: dst GPU address
3985  * @num_gpu_pages: number of GPU pages to xfer
3986  * @resv: reservation object to sync to
3987  *
3988  * Copy GPU paging using the CP DMA engine (CIK+).
3989  * Used by the radeon ttm implementation to move pages if
3990  * registered as the asic copy callback.
3991  */
3992 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3993 				    uint64_t src_offset, uint64_t dst_offset,
3994 				    unsigned num_gpu_pages,
3995 				    struct reservation_object *resv)
3996 {
3997 	struct radeon_fence *fence;
3998 	struct radeon_sync sync;
3999 	int ring_index = rdev->asic->copy.blit_ring_index;
4000 	struct radeon_ring *ring = &rdev->ring[ring_index];
4001 	u32 size_in_bytes, cur_size_in_bytes, control;
4002 	int i, num_loops;
4003 	int r = 0;
4004 
4005 	radeon_sync_create(&sync);
4006 
4007 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4008 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4009 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4010 	if (r) {
4011 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4012 		radeon_sync_free(rdev, &sync, NULL);
4013 		return ERR_PTR(r);
4014 	}
4015 
4016 	radeon_sync_resv(rdev, &sync, resv, false);
4017 	radeon_sync_rings(rdev, &sync, ring->idx);
4018 
4019 	for (i = 0; i < num_loops; i++) {
4020 		cur_size_in_bytes = size_in_bytes;
4021 		if (cur_size_in_bytes > 0x1fffff)
4022 			cur_size_in_bytes = 0x1fffff;
4023 		size_in_bytes -= cur_size_in_bytes;
4024 		control = 0;
4025 		if (size_in_bytes == 0)
4026 			control |= PACKET3_DMA_DATA_CP_SYNC;
4027 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4028 		radeon_ring_write(ring, control);
4029 		radeon_ring_write(ring, lower_32_bits(src_offset));
4030 		radeon_ring_write(ring, upper_32_bits(src_offset));
4031 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4032 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4033 		radeon_ring_write(ring, cur_size_in_bytes);
4034 		src_offset += cur_size_in_bytes;
4035 		dst_offset += cur_size_in_bytes;
4036 	}
4037 
4038 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4039 	if (r) {
4040 		radeon_ring_unlock_undo(rdev, ring);
4041 		radeon_sync_free(rdev, &sync, NULL);
4042 		return ERR_PTR(r);
4043 	}
4044 
4045 	radeon_ring_unlock_commit(rdev, ring, false);
4046 	radeon_sync_free(rdev, &sync, fence);
4047 
4048 	return fence;
4049 }
4050 
4051 /*
4052  * IB stuff
4053  */
4054 /**
4055  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4056  *
4057  * @rdev: radeon_device pointer
4058  * @ib: radeon indirect buffer object
4059  *
4060  * Emits an DE (drawing engine) or CE (constant engine) IB
4061  * on the gfx ring.  IBs are usually generated by userspace
4062  * acceleration drivers and submitted to the kernel for
4063  * sheduling on the ring.  This function schedules the IB
4064  * on the gfx ring for execution by the GPU.
4065  */
4066 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4067 {
4068 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4069 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4070 	u32 header, control = INDIRECT_BUFFER_VALID;
4071 
4072 	if (ib->is_const_ib) {
4073 		/* set switch buffer packet before const IB */
4074 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4075 		radeon_ring_write(ring, 0);
4076 
4077 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4078 	} else {
4079 		u32 next_rptr;
4080 		if (ring->rptr_save_reg) {
4081 			next_rptr = ring->wptr + 3 + 4;
4082 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4083 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4084 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4085 			radeon_ring_write(ring, next_rptr);
4086 		} else if (rdev->wb.enabled) {
4087 			next_rptr = ring->wptr + 5 + 4;
4088 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4089 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4090 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4091 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4092 			radeon_ring_write(ring, next_rptr);
4093 		}
4094 
4095 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4096 	}
4097 
4098 	control |= ib->length_dw | (vm_id << 24);
4099 
4100 	radeon_ring_write(ring, header);
4101 	radeon_ring_write(ring,
4102 #ifdef __BIG_ENDIAN
4103 			  (2 << 0) |
4104 #endif
4105 			  (ib->gpu_addr & 0xFFFFFFFC));
4106 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4107 	radeon_ring_write(ring, control);
4108 }
4109 
4110 /**
4111  * cik_ib_test - basic gfx ring IB test
4112  *
4113  * @rdev: radeon_device pointer
4114  * @ring: radeon_ring structure holding ring information
4115  *
4116  * Allocate an IB and execute it on the gfx ring (CIK).
4117  * Provides a basic gfx ring test to verify that IBs are working.
4118  * Returns 0 on success, error on failure.
4119  */
4120 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4121 {
4122 	struct radeon_ib ib;
4123 	uint32_t scratch;
4124 	uint32_t tmp = 0;
4125 	unsigned i;
4126 	int r;
4127 
4128 	r = radeon_scratch_get(rdev, &scratch);
4129 	if (r) {
4130 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4131 		return r;
4132 	}
4133 	WREG32(scratch, 0xCAFEDEAD);
4134 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4135 	if (r) {
4136 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4137 		radeon_scratch_free(rdev, scratch);
4138 		return r;
4139 	}
4140 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4141 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4142 	ib.ptr[2] = 0xDEADBEEF;
4143 	ib.length_dw = 3;
4144 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4145 	if (r) {
4146 		radeon_scratch_free(rdev, scratch);
4147 		radeon_ib_free(rdev, &ib);
4148 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4149 		return r;
4150 	}
4151 	r = radeon_fence_wait(ib.fence, false);
4152 	if (r) {
4153 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4154 		radeon_scratch_free(rdev, scratch);
4155 		radeon_ib_free(rdev, &ib);
4156 		return r;
4157 	}
4158 	for (i = 0; i < rdev->usec_timeout; i++) {
4159 		tmp = RREG32(scratch);
4160 		if (tmp == 0xDEADBEEF)
4161 			break;
4162 		DRM_UDELAY(1);
4163 	}
4164 	if (i < rdev->usec_timeout) {
4165 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4166 	} else {
4167 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4168 			  scratch, tmp);
4169 		r = -EINVAL;
4170 	}
4171 	radeon_scratch_free(rdev, scratch);
4172 	radeon_ib_free(rdev, &ib);
4173 	return r;
4174 }
4175 
4176 /*
4177  * CP.
4178  * On CIK, gfx and compute now have independant command processors.
4179  *
4180  * GFX
4181  * Gfx consists of a single ring and can process both gfx jobs and
4182  * compute jobs.  The gfx CP consists of three microengines (ME):
4183  * PFP - Pre-Fetch Parser
4184  * ME - Micro Engine
4185  * CE - Constant Engine
4186  * The PFP and ME make up what is considered the Drawing Engine (DE).
4187  * The CE is an asynchronous engine used for updating buffer desciptors
4188  * used by the DE so that they can be loaded into cache in parallel
4189  * while the DE is processing state update packets.
4190  *
4191  * Compute
4192  * The compute CP consists of two microengines (ME):
4193  * MEC1 - Compute MicroEngine 1
4194  * MEC2 - Compute MicroEngine 2
4195  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4196  * The queues are exposed to userspace and are programmed directly
4197  * by the compute runtime.
4198  */
4199 /**
4200  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4201  *
4202  * @rdev: radeon_device pointer
4203  * @enable: enable or disable the MEs
4204  *
4205  * Halts or unhalts the gfx MEs.
4206  */
4207 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4208 {
4209 	if (enable)
4210 		WREG32(CP_ME_CNTL, 0);
4211 	else {
4212 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4213 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4214 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4215 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4216 	}
4217 	udelay(50);
4218 }
4219 
4220 /**
4221  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4222  *
4223  * @rdev: radeon_device pointer
4224  *
4225  * Loads the gfx PFP, ME, and CE ucode.
4226  * Returns 0 for success, -EINVAL if the ucode is not available.
4227  */
4228 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4229 {
4230 	int i;
4231 
4232 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4233 		return -EINVAL;
4234 
4235 	cik_cp_gfx_enable(rdev, false);
4236 
4237 	if (rdev->new_fw) {
4238 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4239 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4240 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4241 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4242 		const struct gfx_firmware_header_v1_0 *me_hdr =
4243 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4244 		const __le32 *fw_data;
4245 		u32 fw_size;
4246 
4247 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4248 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4249 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4250 
4251 		/* PFP */
4252 		fw_data = (const __le32 *)
4253 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4254 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4255 		WREG32(CP_PFP_UCODE_ADDR, 0);
4256 		for (i = 0; i < fw_size; i++)
4257 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4258 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4259 
4260 		/* CE */
4261 		fw_data = (const __le32 *)
4262 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4263 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4264 		WREG32(CP_CE_UCODE_ADDR, 0);
4265 		for (i = 0; i < fw_size; i++)
4266 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4267 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4268 
4269 		/* ME */
4270 		fw_data = (const __be32 *)
4271 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4272 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4273 		WREG32(CP_ME_RAM_WADDR, 0);
4274 		for (i = 0; i < fw_size; i++)
4275 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4276 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4277 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4278 	} else {
4279 		const __be32 *fw_data;
4280 
4281 		/* PFP */
4282 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4283 		WREG32(CP_PFP_UCODE_ADDR, 0);
4284 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4285 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4286 		WREG32(CP_PFP_UCODE_ADDR, 0);
4287 
4288 		/* CE */
4289 		fw_data = (const __be32 *)rdev->ce_fw->data;
4290 		WREG32(CP_CE_UCODE_ADDR, 0);
4291 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4292 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4293 		WREG32(CP_CE_UCODE_ADDR, 0);
4294 
4295 		/* ME */
4296 		fw_data = (const __be32 *)rdev->me_fw->data;
4297 		WREG32(CP_ME_RAM_WADDR, 0);
4298 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4299 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4300 		WREG32(CP_ME_RAM_WADDR, 0);
4301 	}
4302 
4303 	return 0;
4304 }
4305 
4306 /**
4307  * cik_cp_gfx_start - start the gfx ring
4308  *
4309  * @rdev: radeon_device pointer
4310  *
4311  * Enables the ring and loads the clear state context and other
4312  * packets required to init the ring.
4313  * Returns 0 for success, error for failure.
4314  */
4315 static int cik_cp_gfx_start(struct radeon_device *rdev)
4316 {
4317 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4318 	int r, i;
4319 
4320 	/* init the CP */
4321 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4322 	WREG32(CP_ENDIAN_SWAP, 0);
4323 	WREG32(CP_DEVICE_ID, 1);
4324 
4325 	cik_cp_gfx_enable(rdev, true);
4326 
4327 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4328 	if (r) {
4329 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4330 		return r;
4331 	}
4332 
4333 	/* init the CE partitions.  CE only used for gfx on CIK */
4334 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336 	radeon_ring_write(ring, 0x8000);
4337 	radeon_ring_write(ring, 0x8000);
4338 
4339 	/* setup clear context state */
4340 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4342 
4343 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4344 	radeon_ring_write(ring, 0x80000000);
4345 	radeon_ring_write(ring, 0x80000000);
4346 
4347 	for (i = 0; i < cik_default_size; i++)
4348 		radeon_ring_write(ring, cik_default_state[i]);
4349 
4350 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4351 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4352 
4353 	/* set clear context state */
4354 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4355 	radeon_ring_write(ring, 0);
4356 
4357 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4358 	radeon_ring_write(ring, 0x00000316);
4359 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4360 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4361 
4362 	radeon_ring_unlock_commit(rdev, ring, false);
4363 
4364 	return 0;
4365 }
4366 
4367 /**
4368  * cik_cp_gfx_fini - stop the gfx ring
4369  *
4370  * @rdev: radeon_device pointer
4371  *
4372  * Stop the gfx ring and tear down the driver ring
4373  * info.
4374  */
4375 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4376 {
4377 	cik_cp_gfx_enable(rdev, false);
4378 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4379 }
4380 
4381 /**
4382  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Program the location and size of the gfx ring buffer
4387  * and test it to make sure it's working.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4391 {
4392 	struct radeon_ring *ring;
4393 	u32 tmp;
4394 	u32 rb_bufsz;
4395 	u64 rb_addr;
4396 	int r;
4397 
4398 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4399 	if (rdev->family != CHIP_HAWAII)
4400 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4401 
4402 	/* Set the write pointer delay */
4403 	WREG32(CP_RB_WPTR_DELAY, 0);
4404 
4405 	/* set the RB to use vmid 0 */
4406 	WREG32(CP_RB_VMID, 0);
4407 
4408 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4409 
4410 	/* ring 0 - compute and gfx */
4411 	/* Set ring buffer size */
4412 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4413 	rb_bufsz = order_base_2(ring->ring_size / 8);
4414 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4415 #ifdef __BIG_ENDIAN
4416 	tmp |= BUF_SWAP_32BIT;
4417 #endif
4418 	WREG32(CP_RB0_CNTL, tmp);
4419 
4420 	/* Initialize the ring buffer's read and write pointers */
4421 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4422 	ring->wptr = 0;
4423 	WREG32(CP_RB0_WPTR, ring->wptr);
4424 
4425 	/* set the wb address wether it's enabled or not */
4426 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4427 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4428 
4429 	/* scratch register shadowing is no longer supported */
4430 	WREG32(SCRATCH_UMSK, 0);
4431 
4432 	if (!rdev->wb.enabled)
4433 		tmp |= RB_NO_UPDATE;
4434 
4435 	mdelay(1);
4436 	WREG32(CP_RB0_CNTL, tmp);
4437 
4438 	rb_addr = ring->gpu_addr >> 8;
4439 	WREG32(CP_RB0_BASE, rb_addr);
4440 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4441 
4442 	/* start the ring */
4443 	cik_cp_gfx_start(rdev);
4444 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4445 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4446 	if (r) {
4447 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4448 		return r;
4449 	}
4450 
4451 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4452 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4453 
4454 	return 0;
4455 }
4456 
4457 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4458 		     struct radeon_ring *ring)
4459 {
4460 	u32 rptr;
4461 
4462 	if (rdev->wb.enabled)
4463 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4464 	else
4465 		rptr = RREG32(CP_RB0_RPTR);
4466 
4467 	return rptr;
4468 }
4469 
4470 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4471 		     struct radeon_ring *ring)
4472 {
4473 	u32 wptr;
4474 
4475 	wptr = RREG32(CP_RB0_WPTR);
4476 
4477 	return wptr;
4478 }
4479 
4480 void cik_gfx_set_wptr(struct radeon_device *rdev,
4481 		      struct radeon_ring *ring)
4482 {
4483 	WREG32(CP_RB0_WPTR, ring->wptr);
4484 	(void)RREG32(CP_RB0_WPTR);
4485 }
4486 
4487 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4488 			 struct radeon_ring *ring)
4489 {
4490 	u32 rptr;
4491 
4492 	if (rdev->wb.enabled) {
4493 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4494 	} else {
4495 		mutex_lock(&rdev->srbm_mutex);
4496 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4497 		rptr = RREG32(CP_HQD_PQ_RPTR);
4498 		cik_srbm_select(rdev, 0, 0, 0, 0);
4499 		mutex_unlock(&rdev->srbm_mutex);
4500 	}
4501 
4502 	return rptr;
4503 }
4504 
4505 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4506 			 struct radeon_ring *ring)
4507 {
4508 	u32 wptr;
4509 
4510 	if (rdev->wb.enabled) {
4511 		/* XXX check if swapping is necessary on BE */
4512 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4513 	} else {
4514 		mutex_lock(&rdev->srbm_mutex);
4515 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4516 		wptr = RREG32(CP_HQD_PQ_WPTR);
4517 		cik_srbm_select(rdev, 0, 0, 0, 0);
4518 		mutex_unlock(&rdev->srbm_mutex);
4519 	}
4520 
4521 	return wptr;
4522 }
4523 
4524 void cik_compute_set_wptr(struct radeon_device *rdev,
4525 			  struct radeon_ring *ring)
4526 {
4527 	/* XXX check if swapping is necessary on BE */
4528 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4529 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4530 }
4531 
4532 /**
4533  * cik_cp_compute_enable - enable/disable the compute CP MEs
4534  *
4535  * @rdev: radeon_device pointer
4536  * @enable: enable or disable the MEs
4537  *
4538  * Halts or unhalts the compute MEs.
4539  */
4540 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4541 {
4542 	if (enable)
4543 		WREG32(CP_MEC_CNTL, 0);
4544 	else {
4545 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4546 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4547 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4548 	}
4549 	udelay(50);
4550 }
4551 
4552 /**
4553  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4554  *
4555  * @rdev: radeon_device pointer
4556  *
4557  * Loads the compute MEC1&2 ucode.
4558  * Returns 0 for success, -EINVAL if the ucode is not available.
4559  */
4560 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4561 {
4562 	int i;
4563 
4564 	if (!rdev->mec_fw)
4565 		return -EINVAL;
4566 
4567 	cik_cp_compute_enable(rdev, false);
4568 
4569 	if (rdev->new_fw) {
4570 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4571 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4572 		const __le32 *fw_data;
4573 		u32 fw_size;
4574 
4575 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4576 
4577 		/* MEC1 */
4578 		fw_data = (const __le32 *)
4579 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4580 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4581 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4582 		for (i = 0; i < fw_size; i++)
4583 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4584 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4585 
4586 		/* MEC2 */
4587 		if (rdev->family == CHIP_KAVERI) {
4588 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4589 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4590 
4591 			fw_data = (const __le32 *)
4592 				(rdev->mec2_fw->data +
4593 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4594 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4595 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4596 			for (i = 0; i < fw_size; i++)
4597 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4598 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4599 		}
4600 	} else {
4601 		const __be32 *fw_data;
4602 
4603 		/* MEC1 */
4604 		fw_data = (const __be32 *)rdev->mec_fw->data;
4605 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4606 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4607 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4608 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609 
4610 		if (rdev->family == CHIP_KAVERI) {
4611 			/* MEC2 */
4612 			fw_data = (const __be32 *)rdev->mec_fw->data;
4613 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4614 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4615 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4616 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4617 		}
4618 	}
4619 
4620 	return 0;
4621 }
4622 
4623 /**
4624  * cik_cp_compute_start - start the compute queues
4625  *
4626  * @rdev: radeon_device pointer
4627  *
4628  * Enable the compute queues.
4629  * Returns 0 for success, error for failure.
4630  */
4631 static int cik_cp_compute_start(struct radeon_device *rdev)
4632 {
4633 	cik_cp_compute_enable(rdev, true);
4634 
4635 	return 0;
4636 }
4637 
4638 /**
4639  * cik_cp_compute_fini - stop the compute queues
4640  *
4641  * @rdev: radeon_device pointer
4642  *
4643  * Stop the compute queues and tear down the driver queue
4644  * info.
4645  */
4646 static void cik_cp_compute_fini(struct radeon_device *rdev)
4647 {
4648 	int i, idx, r;
4649 
4650 	cik_cp_compute_enable(rdev, false);
4651 
4652 	for (i = 0; i < 2; i++) {
4653 		if (i == 0)
4654 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4655 		else
4656 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4657 
4658 		if (rdev->ring[idx].mqd_obj) {
4659 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660 			if (unlikely(r != 0))
4661 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4662 
4663 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4664 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665 
4666 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4667 			rdev->ring[idx].mqd_obj = NULL;
4668 		}
4669 	}
4670 }
4671 
4672 static void cik_mec_fini(struct radeon_device *rdev)
4673 {
4674 	int r;
4675 
4676 	if (rdev->mec.hpd_eop_obj) {
4677 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4678 		if (unlikely(r != 0))
4679 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4680 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4681 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4682 
4683 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4684 		rdev->mec.hpd_eop_obj = NULL;
4685 	}
4686 }
4687 
4688 #define MEC_HPD_SIZE 2048
4689 
4690 static int cik_mec_init(struct radeon_device *rdev)
4691 {
4692 	int r;
4693 	u32 *hpd;
4694 
4695 	/*
4696 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4697 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4698 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4699 	 * be handled by KFD
4700 	 */
4701 	rdev->mec.num_mec = 1;
4702 	rdev->mec.num_pipe = 1;
4703 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4704 
4705 	if (rdev->mec.hpd_eop_obj == NULL) {
4706 		r = radeon_bo_create(rdev,
4707 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4708 				     PAGE_SIZE, true,
4709 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4710 				     &rdev->mec.hpd_eop_obj);
4711 		if (r) {
4712 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4713 			return r;
4714 		}
4715 	}
4716 
4717 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4718 	if (unlikely(r != 0)) {
4719 		cik_mec_fini(rdev);
4720 		return r;
4721 	}
4722 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4723 			  &rdev->mec.hpd_eop_gpu_addr);
4724 	if (r) {
4725 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4726 		cik_mec_fini(rdev);
4727 		return r;
4728 	}
4729 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4730 	if (r) {
4731 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4732 		cik_mec_fini(rdev);
4733 		return r;
4734 	}
4735 
4736 	/* clear memory.  Not sure if this is required or not */
4737 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4738 
4739 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4740 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4741 
4742 	return 0;
4743 }
4744 
4745 struct hqd_registers
4746 {
4747 	u32 cp_mqd_base_addr;
4748 	u32 cp_mqd_base_addr_hi;
4749 	u32 cp_hqd_active;
4750 	u32 cp_hqd_vmid;
4751 	u32 cp_hqd_persistent_state;
4752 	u32 cp_hqd_pipe_priority;
4753 	u32 cp_hqd_queue_priority;
4754 	u32 cp_hqd_quantum;
4755 	u32 cp_hqd_pq_base;
4756 	u32 cp_hqd_pq_base_hi;
4757 	u32 cp_hqd_pq_rptr;
4758 	u32 cp_hqd_pq_rptr_report_addr;
4759 	u32 cp_hqd_pq_rptr_report_addr_hi;
4760 	u32 cp_hqd_pq_wptr_poll_addr;
4761 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4762 	u32 cp_hqd_pq_doorbell_control;
4763 	u32 cp_hqd_pq_wptr;
4764 	u32 cp_hqd_pq_control;
4765 	u32 cp_hqd_ib_base_addr;
4766 	u32 cp_hqd_ib_base_addr_hi;
4767 	u32 cp_hqd_ib_rptr;
4768 	u32 cp_hqd_ib_control;
4769 	u32 cp_hqd_iq_timer;
4770 	u32 cp_hqd_iq_rptr;
4771 	u32 cp_hqd_dequeue_request;
4772 	u32 cp_hqd_dma_offload;
4773 	u32 cp_hqd_sema_cmd;
4774 	u32 cp_hqd_msg_type;
4775 	u32 cp_hqd_atomic0_preop_lo;
4776 	u32 cp_hqd_atomic0_preop_hi;
4777 	u32 cp_hqd_atomic1_preop_lo;
4778 	u32 cp_hqd_atomic1_preop_hi;
4779 	u32 cp_hqd_hq_scheduler0;
4780 	u32 cp_hqd_hq_scheduler1;
4781 	u32 cp_mqd_control;
4782 };
4783 
4784 struct bonaire_mqd
4785 {
4786 	u32 header;
4787 	u32 dispatch_initiator;
4788 	u32 dimensions[3];
4789 	u32 start_idx[3];
4790 	u32 num_threads[3];
4791 	u32 pipeline_stat_enable;
4792 	u32 perf_counter_enable;
4793 	u32 pgm[2];
4794 	u32 tba[2];
4795 	u32 tma[2];
4796 	u32 pgm_rsrc[2];
4797 	u32 vmid;
4798 	u32 resource_limits;
4799 	u32 static_thread_mgmt01[2];
4800 	u32 tmp_ring_size;
4801 	u32 static_thread_mgmt23[2];
4802 	u32 restart[3];
4803 	u32 thread_trace_enable;
4804 	u32 reserved1;
4805 	u32 user_data[16];
4806 	u32 vgtcs_invoke_count[2];
4807 	struct hqd_registers queue_state;
4808 	u32 dequeue_cntr;
4809 	u32 interrupt_queue[64];
4810 };
4811 
4812 /**
4813  * cik_cp_compute_resume - setup the compute queue registers
4814  *
4815  * @rdev: radeon_device pointer
4816  *
4817  * Program the compute queues and test them to make sure they
4818  * are working.
4819  * Returns 0 for success, error for failure.
4820  */
4821 static int cik_cp_compute_resume(struct radeon_device *rdev)
4822 {
4823 	int r, i, j, idx;
4824 	u32 tmp;
4825 	bool use_doorbell = true;
4826 	u64 hqd_gpu_addr;
4827 	u64 mqd_gpu_addr;
4828 	u64 eop_gpu_addr;
4829 	u64 wb_gpu_addr;
4830 	u32 *buf;
4831 	struct bonaire_mqd *mqd;
4832 
4833 	r = cik_cp_compute_start(rdev);
4834 	if (r)
4835 		return r;
4836 
4837 	/* fix up chicken bits */
4838 	tmp = RREG32(CP_CPF_DEBUG);
4839 	tmp |= (1 << 23);
4840 	WREG32(CP_CPF_DEBUG, tmp);
4841 
4842 	/* init the pipes */
4843 	mutex_lock(&rdev->srbm_mutex);
4844 
4845 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4846 
4847 	cik_srbm_select(rdev, 0, 0, 0, 0);
4848 
4849 	/* write the EOP addr */
4850 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4851 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4852 
4853 	/* set the VMID assigned */
4854 	WREG32(CP_HPD_EOP_VMID, 0);
4855 
4856 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4857 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4858 	tmp &= ~EOP_SIZE_MASK;
4859 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4860 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4861 
4862 	mutex_unlock(&rdev->srbm_mutex);
4863 
4864 	/* init the queues.  Just two for now. */
4865 	for (i = 0; i < 2; i++) {
4866 		if (i == 0)
4867 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4868 		else
4869 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4870 
4871 		if (rdev->ring[idx].mqd_obj == NULL) {
4872 			r = radeon_bo_create(rdev,
4873 					     sizeof(struct bonaire_mqd),
4874 					     PAGE_SIZE, true,
4875 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4876 					     NULL, &rdev->ring[idx].mqd_obj);
4877 			if (r) {
4878 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4879 				return r;
4880 			}
4881 		}
4882 
4883 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4884 		if (unlikely(r != 0)) {
4885 			cik_cp_compute_fini(rdev);
4886 			return r;
4887 		}
4888 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4889 				  &mqd_gpu_addr);
4890 		if (r) {
4891 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4892 			cik_cp_compute_fini(rdev);
4893 			return r;
4894 		}
4895 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4896 		if (r) {
4897 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4898 			cik_cp_compute_fini(rdev);
4899 			return r;
4900 		}
4901 
4902 		/* init the mqd struct */
4903 		memset(buf, 0, sizeof(struct bonaire_mqd));
4904 
4905 		mqd = (struct bonaire_mqd *)buf;
4906 		mqd->header = 0xC0310800;
4907 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4908 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4909 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4910 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4911 
4912 		mutex_lock(&rdev->srbm_mutex);
4913 		cik_srbm_select(rdev, rdev->ring[idx].me,
4914 				rdev->ring[idx].pipe,
4915 				rdev->ring[idx].queue, 0);
4916 
4917 		/* disable wptr polling */
4918 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4919 		tmp &= ~WPTR_POLL_EN;
4920 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4921 
4922 		/* enable doorbell? */
4923 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4924 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4925 		if (use_doorbell)
4926 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4927 		else
4928 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4929 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4930 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4931 
4932 		/* disable the queue if it's active */
4933 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4934 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4935 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4936 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4937 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4938 			for (j = 0; j < rdev->usec_timeout; j++) {
4939 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4940 					break;
4941 				udelay(1);
4942 			}
4943 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4944 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4945 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4946 		}
4947 
4948 		/* set the pointer to the MQD */
4949 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4950 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4951 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4952 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4953 		/* set MQD vmid to 0 */
4954 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4955 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4956 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4957 
4958 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4959 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4960 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4961 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4962 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4963 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4964 
4965 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4966 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4967 		mqd->queue_state.cp_hqd_pq_control &=
4968 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4969 
4970 		mqd->queue_state.cp_hqd_pq_control |=
4971 			order_base_2(rdev->ring[idx].ring_size / 8);
4972 		mqd->queue_state.cp_hqd_pq_control |=
4973 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4974 #ifdef __BIG_ENDIAN
4975 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4976 #endif
4977 		mqd->queue_state.cp_hqd_pq_control &=
4978 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4979 		mqd->queue_state.cp_hqd_pq_control |=
4980 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4981 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4982 
4983 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4984 		if (i == 0)
4985 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4986 		else
4987 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4988 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4989 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4990 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4991 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4992 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4993 
4994 		/* set the wb address wether it's enabled or not */
4995 		if (i == 0)
4996 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4997 		else
4998 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4999 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5000 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5001 			upper_32_bits(wb_gpu_addr) & 0xffff;
5002 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5003 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5004 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5005 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5006 
5007 		/* enable the doorbell if requested */
5008 		if (use_doorbell) {
5009 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5010 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5011 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5012 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5013 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5014 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5015 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5016 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5017 
5018 		} else {
5019 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5020 		}
5021 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5022 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5023 
5024 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5025 		rdev->ring[idx].wptr = 0;
5026 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5027 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5028 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5029 
5030 		/* set the vmid for the queue */
5031 		mqd->queue_state.cp_hqd_vmid = 0;
5032 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5033 
5034 		/* activate the queue */
5035 		mqd->queue_state.cp_hqd_active = 1;
5036 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5037 
5038 		cik_srbm_select(rdev, 0, 0, 0, 0);
5039 		mutex_unlock(&rdev->srbm_mutex);
5040 
5041 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5042 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5043 
5044 		rdev->ring[idx].ready = true;
5045 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5046 		if (r)
5047 			rdev->ring[idx].ready = false;
5048 	}
5049 
5050 	return 0;
5051 }
5052 
5053 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5054 {
5055 	cik_cp_gfx_enable(rdev, enable);
5056 	cik_cp_compute_enable(rdev, enable);
5057 }
5058 
5059 static int cik_cp_load_microcode(struct radeon_device *rdev)
5060 {
5061 	int r;
5062 
5063 	r = cik_cp_gfx_load_microcode(rdev);
5064 	if (r)
5065 		return r;
5066 	r = cik_cp_compute_load_microcode(rdev);
5067 	if (r)
5068 		return r;
5069 
5070 	return 0;
5071 }
5072 
5073 static void cik_cp_fini(struct radeon_device *rdev)
5074 {
5075 	cik_cp_gfx_fini(rdev);
5076 	cik_cp_compute_fini(rdev);
5077 }
5078 
5079 static int cik_cp_resume(struct radeon_device *rdev)
5080 {
5081 	int r;
5082 
5083 	cik_enable_gui_idle_interrupt(rdev, false);
5084 
5085 	r = cik_cp_load_microcode(rdev);
5086 	if (r)
5087 		return r;
5088 
5089 	r = cik_cp_gfx_resume(rdev);
5090 	if (r)
5091 		return r;
5092 	r = cik_cp_compute_resume(rdev);
5093 	if (r)
5094 		return r;
5095 
5096 	cik_enable_gui_idle_interrupt(rdev, true);
5097 
5098 	return 0;
5099 }
5100 
5101 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5102 {
5103 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5104 		RREG32(GRBM_STATUS));
5105 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5106 		RREG32(GRBM_STATUS2));
5107 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5108 		RREG32(GRBM_STATUS_SE0));
5109 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5110 		RREG32(GRBM_STATUS_SE1));
5111 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5112 		RREG32(GRBM_STATUS_SE2));
5113 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5114 		RREG32(GRBM_STATUS_SE3));
5115 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5116 		RREG32(SRBM_STATUS));
5117 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5118 		RREG32(SRBM_STATUS2));
5119 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5120 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5121 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5122 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5123 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5124 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5125 		 RREG32(CP_STALLED_STAT1));
5126 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5127 		 RREG32(CP_STALLED_STAT2));
5128 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5129 		 RREG32(CP_STALLED_STAT3));
5130 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5131 		 RREG32(CP_CPF_BUSY_STAT));
5132 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5133 		 RREG32(CP_CPF_STALLED_STAT1));
5134 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5135 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5136 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5137 		 RREG32(CP_CPC_STALLED_STAT1));
5138 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5139 }
5140 
5141 /**
5142  * cik_gpu_check_soft_reset - check which blocks are busy
5143  *
5144  * @rdev: radeon_device pointer
5145  *
5146  * Check which blocks are busy and return the relevant reset
5147  * mask to be used by cik_gpu_soft_reset().
5148  * Returns a mask of the blocks to be reset.
5149  */
5150 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5151 {
5152 	u32 reset_mask = 0;
5153 	u32 tmp;
5154 
5155 	/* GRBM_STATUS */
5156 	tmp = RREG32(GRBM_STATUS);
5157 	if (tmp & (PA_BUSY | SC_BUSY |
5158 		   BCI_BUSY | SX_BUSY |
5159 		   TA_BUSY | VGT_BUSY |
5160 		   DB_BUSY | CB_BUSY |
5161 		   GDS_BUSY | SPI_BUSY |
5162 		   IA_BUSY | IA_BUSY_NO_DMA))
5163 		reset_mask |= RADEON_RESET_GFX;
5164 
5165 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5166 		reset_mask |= RADEON_RESET_CP;
5167 
5168 	/* GRBM_STATUS2 */
5169 	tmp = RREG32(GRBM_STATUS2);
5170 	if (tmp & RLC_BUSY)
5171 		reset_mask |= RADEON_RESET_RLC;
5172 
5173 	/* SDMA0_STATUS_REG */
5174 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5175 	if (!(tmp & SDMA_IDLE))
5176 		reset_mask |= RADEON_RESET_DMA;
5177 
5178 	/* SDMA1_STATUS_REG */
5179 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5180 	if (!(tmp & SDMA_IDLE))
5181 		reset_mask |= RADEON_RESET_DMA1;
5182 
5183 	/* SRBM_STATUS2 */
5184 	tmp = RREG32(SRBM_STATUS2);
5185 	if (tmp & SDMA_BUSY)
5186 		reset_mask |= RADEON_RESET_DMA;
5187 
5188 	if (tmp & SDMA1_BUSY)
5189 		reset_mask |= RADEON_RESET_DMA1;
5190 
5191 	/* SRBM_STATUS */
5192 	tmp = RREG32(SRBM_STATUS);
5193 
5194 	if (tmp & IH_BUSY)
5195 		reset_mask |= RADEON_RESET_IH;
5196 
5197 	if (tmp & SEM_BUSY)
5198 		reset_mask |= RADEON_RESET_SEM;
5199 
5200 	if (tmp & GRBM_RQ_PENDING)
5201 		reset_mask |= RADEON_RESET_GRBM;
5202 
5203 	if (tmp & VMC_BUSY)
5204 		reset_mask |= RADEON_RESET_VMC;
5205 
5206 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5207 		   MCC_BUSY | MCD_BUSY))
5208 		reset_mask |= RADEON_RESET_MC;
5209 
5210 	if (evergreen_is_display_hung(rdev))
5211 		reset_mask |= RADEON_RESET_DISPLAY;
5212 
5213 	/* Skip MC reset as it's mostly likely not hung, just busy */
5214 	if (reset_mask & RADEON_RESET_MC) {
5215 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5216 		reset_mask &= ~RADEON_RESET_MC;
5217 	}
5218 
5219 	return reset_mask;
5220 }
5221 
5222 /**
5223  * cik_gpu_soft_reset - soft reset GPU
5224  *
5225  * @rdev: radeon_device pointer
5226  * @reset_mask: mask of which blocks to reset
5227  *
5228  * Soft reset the blocks specified in @reset_mask.
5229  */
5230 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5231 {
5232 	struct evergreen_mc_save save;
5233 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5234 	u32 tmp;
5235 
5236 	if (reset_mask == 0)
5237 		return;
5238 
5239 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5240 
5241 	cik_print_gpu_status_regs(rdev);
5242 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5243 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5244 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5245 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5246 
5247 	/* disable CG/PG */
5248 	cik_fini_pg(rdev);
5249 	cik_fini_cg(rdev);
5250 
5251 	/* stop the rlc */
5252 	cik_rlc_stop(rdev);
5253 
5254 	/* Disable GFX parsing/prefetching */
5255 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5256 
5257 	/* Disable MEC parsing/prefetching */
5258 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5259 
5260 	if (reset_mask & RADEON_RESET_DMA) {
5261 		/* sdma0 */
5262 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5263 		tmp |= SDMA_HALT;
5264 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5265 	}
5266 	if (reset_mask & RADEON_RESET_DMA1) {
5267 		/* sdma1 */
5268 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5269 		tmp |= SDMA_HALT;
5270 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5271 	}
5272 
5273 	evergreen_mc_stop(rdev, &save);
5274 	if (evergreen_mc_wait_for_idle(rdev)) {
5275 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276 	}
5277 
5278 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5279 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5280 
5281 	if (reset_mask & RADEON_RESET_CP) {
5282 		grbm_soft_reset |= SOFT_RESET_CP;
5283 
5284 		srbm_soft_reset |= SOFT_RESET_GRBM;
5285 	}
5286 
5287 	if (reset_mask & RADEON_RESET_DMA)
5288 		srbm_soft_reset |= SOFT_RESET_SDMA;
5289 
5290 	if (reset_mask & RADEON_RESET_DMA1)
5291 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5292 
5293 	if (reset_mask & RADEON_RESET_DISPLAY)
5294 		srbm_soft_reset |= SOFT_RESET_DC;
5295 
5296 	if (reset_mask & RADEON_RESET_RLC)
5297 		grbm_soft_reset |= SOFT_RESET_RLC;
5298 
5299 	if (reset_mask & RADEON_RESET_SEM)
5300 		srbm_soft_reset |= SOFT_RESET_SEM;
5301 
5302 	if (reset_mask & RADEON_RESET_IH)
5303 		srbm_soft_reset |= SOFT_RESET_IH;
5304 
5305 	if (reset_mask & RADEON_RESET_GRBM)
5306 		srbm_soft_reset |= SOFT_RESET_GRBM;
5307 
5308 	if (reset_mask & RADEON_RESET_VMC)
5309 		srbm_soft_reset |= SOFT_RESET_VMC;
5310 
5311 	if (!(rdev->flags & RADEON_IS_IGP)) {
5312 		if (reset_mask & RADEON_RESET_MC)
5313 			srbm_soft_reset |= SOFT_RESET_MC;
5314 	}
5315 
5316 	if (grbm_soft_reset) {
5317 		tmp = RREG32(GRBM_SOFT_RESET);
5318 		tmp |= grbm_soft_reset;
5319 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5320 		WREG32(GRBM_SOFT_RESET, tmp);
5321 		tmp = RREG32(GRBM_SOFT_RESET);
5322 
5323 		udelay(50);
5324 
5325 		tmp &= ~grbm_soft_reset;
5326 		WREG32(GRBM_SOFT_RESET, tmp);
5327 		tmp = RREG32(GRBM_SOFT_RESET);
5328 	}
5329 
5330 	if (srbm_soft_reset) {
5331 		tmp = RREG32(SRBM_SOFT_RESET);
5332 		tmp |= srbm_soft_reset;
5333 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5334 		WREG32(SRBM_SOFT_RESET, tmp);
5335 		tmp = RREG32(SRBM_SOFT_RESET);
5336 
5337 		udelay(50);
5338 
5339 		tmp &= ~srbm_soft_reset;
5340 		WREG32(SRBM_SOFT_RESET, tmp);
5341 		tmp = RREG32(SRBM_SOFT_RESET);
5342 	}
5343 
5344 	/* Wait a little for things to settle down */
5345 	udelay(50);
5346 
5347 	evergreen_mc_resume(rdev, &save);
5348 	udelay(50);
5349 
5350 	cik_print_gpu_status_regs(rdev);
5351 }
5352 
5353 struct kv_reset_save_regs {
5354 	u32 gmcon_reng_execute;
5355 	u32 gmcon_misc;
5356 	u32 gmcon_misc3;
5357 };
5358 
5359 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5360 				   struct kv_reset_save_regs *save)
5361 {
5362 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5363 	save->gmcon_misc = RREG32(GMCON_MISC);
5364 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5365 
5366 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5367 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5368 						STCTRL_STUTTER_EN));
5369 }
5370 
5371 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5372 				      struct kv_reset_save_regs *save)
5373 {
5374 	int i;
5375 
5376 	WREG32(GMCON_PGFSM_WRITE, 0);
5377 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5378 
5379 	for (i = 0; i < 5; i++)
5380 		WREG32(GMCON_PGFSM_WRITE, 0);
5381 
5382 	WREG32(GMCON_PGFSM_WRITE, 0);
5383 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5384 
5385 	for (i = 0; i < 5; i++)
5386 		WREG32(GMCON_PGFSM_WRITE, 0);
5387 
5388 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5389 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5390 
5391 	for (i = 0; i < 5; i++)
5392 		WREG32(GMCON_PGFSM_WRITE, 0);
5393 
5394 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5395 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5396 
5397 	for (i = 0; i < 5; i++)
5398 		WREG32(GMCON_PGFSM_WRITE, 0);
5399 
5400 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5401 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5402 
5403 	for (i = 0; i < 5; i++)
5404 		WREG32(GMCON_PGFSM_WRITE, 0);
5405 
5406 	WREG32(GMCON_PGFSM_WRITE, 0);
5407 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5408 
5409 	for (i = 0; i < 5; i++)
5410 		WREG32(GMCON_PGFSM_WRITE, 0);
5411 
5412 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5413 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5414 
5415 	for (i = 0; i < 5; i++)
5416 		WREG32(GMCON_PGFSM_WRITE, 0);
5417 
5418 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5419 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5420 
5421 	for (i = 0; i < 5; i++)
5422 		WREG32(GMCON_PGFSM_WRITE, 0);
5423 
5424 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5425 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5426 
5427 	for (i = 0; i < 5; i++)
5428 		WREG32(GMCON_PGFSM_WRITE, 0);
5429 
5430 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5431 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5432 
5433 	for (i = 0; i < 5; i++)
5434 		WREG32(GMCON_PGFSM_WRITE, 0);
5435 
5436 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5437 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5438 
5439 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5440 	WREG32(GMCON_MISC, save->gmcon_misc);
5441 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5442 }
5443 
5444 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5445 {
5446 	struct evergreen_mc_save save;
5447 	struct kv_reset_save_regs kv_save = { 0 };
5448 	u32 tmp, i;
5449 
5450 	dev_info(rdev->dev, "GPU pci config reset\n");
5451 
5452 	/* disable dpm? */
5453 
5454 	/* disable cg/pg */
5455 	cik_fini_pg(rdev);
5456 	cik_fini_cg(rdev);
5457 
5458 	/* Disable GFX parsing/prefetching */
5459 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5460 
5461 	/* Disable MEC parsing/prefetching */
5462 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5463 
5464 	/* sdma0 */
5465 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5466 	tmp |= SDMA_HALT;
5467 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5468 	/* sdma1 */
5469 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5470 	tmp |= SDMA_HALT;
5471 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5472 	/* XXX other engines? */
5473 
5474 	/* halt the rlc, disable cp internal ints */
5475 	cik_rlc_stop(rdev);
5476 
5477 	udelay(50);
5478 
5479 	/* disable mem access */
5480 	evergreen_mc_stop(rdev, &save);
5481 	if (evergreen_mc_wait_for_idle(rdev)) {
5482 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5483 	}
5484 
5485 	if (rdev->flags & RADEON_IS_IGP)
5486 		kv_save_regs_for_reset(rdev, &kv_save);
5487 
5488 	/* disable BM */
5489 	pci_clear_master(rdev->pdev);
5490 	/* reset */
5491 	radeon_pci_config_reset(rdev);
5492 
5493 	udelay(100);
5494 
5495 	/* wait for asic to come out of reset */
5496 	for (i = 0; i < rdev->usec_timeout; i++) {
5497 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5498 			break;
5499 		udelay(1);
5500 	}
5501 
5502 	/* does asic init need to be run first??? */
5503 	if (rdev->flags & RADEON_IS_IGP)
5504 		kv_restore_regs_for_reset(rdev, &kv_save);
5505 }
5506 
5507 /**
5508  * cik_asic_reset - soft reset GPU
5509  *
5510  * @rdev: radeon_device pointer
5511  *
5512  * Look up which blocks are hung and attempt
5513  * to reset them.
5514  * Returns 0 for success.
5515  */
5516 int cik_asic_reset(struct radeon_device *rdev)
5517 {
5518 	u32 reset_mask;
5519 
5520 	reset_mask = cik_gpu_check_soft_reset(rdev);
5521 
5522 	if (reset_mask)
5523 		r600_set_bios_scratch_engine_hung(rdev, true);
5524 
5525 	/* try soft reset */
5526 	cik_gpu_soft_reset(rdev, reset_mask);
5527 
5528 	reset_mask = cik_gpu_check_soft_reset(rdev);
5529 
5530 	/* try pci config reset */
5531 	if (reset_mask && radeon_hard_reset)
5532 		cik_gpu_pci_config_reset(rdev);
5533 
5534 	reset_mask = cik_gpu_check_soft_reset(rdev);
5535 
5536 	if (!reset_mask)
5537 		r600_set_bios_scratch_engine_hung(rdev, false);
5538 
5539 	return 0;
5540 }
5541 
5542 /**
5543  * cik_gfx_is_lockup - check if the 3D engine is locked up
5544  *
5545  * @rdev: radeon_device pointer
5546  * @ring: radeon_ring structure holding ring information
5547  *
5548  * Check if the 3D engine is locked up (CIK).
5549  * Returns true if the engine is locked, false if not.
5550  */
5551 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5552 {
5553 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5554 
5555 	if (!(reset_mask & (RADEON_RESET_GFX |
5556 			    RADEON_RESET_COMPUTE |
5557 			    RADEON_RESET_CP))) {
5558 		radeon_ring_lockup_update(rdev, ring);
5559 		return false;
5560 	}
5561 	return radeon_ring_test_lockup(rdev, ring);
5562 }
5563 
5564 /* MC */
5565 /**
5566  * cik_mc_program - program the GPU memory controller
5567  *
5568  * @rdev: radeon_device pointer
5569  *
5570  * Set the location of vram, gart, and AGP in the GPU's
5571  * physical address space (CIK).
5572  */
5573 static void cik_mc_program(struct radeon_device *rdev)
5574 {
5575 	struct evergreen_mc_save save;
5576 	u32 tmp;
5577 	int i, j;
5578 
5579 	/* Initialize HDP */
5580 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5581 		WREG32((0x2c14 + j), 0x00000000);
5582 		WREG32((0x2c18 + j), 0x00000000);
5583 		WREG32((0x2c1c + j), 0x00000000);
5584 		WREG32((0x2c20 + j), 0x00000000);
5585 		WREG32((0x2c24 + j), 0x00000000);
5586 	}
5587 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5588 
5589 	evergreen_mc_stop(rdev, &save);
5590 	if (radeon_mc_wait_for_idle(rdev)) {
5591 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5592 	}
5593 	/* Lockout access through VGA aperture*/
5594 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5595 	/* Update configuration */
5596 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5597 	       rdev->mc.vram_start >> 12);
5598 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5599 	       rdev->mc.vram_end >> 12);
5600 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5601 	       rdev->vram_scratch.gpu_addr >> 12);
5602 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5603 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5604 	WREG32(MC_VM_FB_LOCATION, tmp);
5605 	/* XXX double check these! */
5606 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5607 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5608 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5609 	WREG32(MC_VM_AGP_BASE, 0);
5610 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5611 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5612 	if (radeon_mc_wait_for_idle(rdev)) {
5613 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5614 	}
5615 	evergreen_mc_resume(rdev, &save);
5616 	/* we need to own VRAM, so turn off the VGA renderer here
5617 	 * to stop it overwriting our objects */
5618 	rv515_vga_render_disable(rdev);
5619 }
5620 
5621 /**
5622  * cik_mc_init - initialize the memory controller driver params
5623  *
5624  * @rdev: radeon_device pointer
5625  *
5626  * Look up the amount of vram, vram width, and decide how to place
5627  * vram and gart within the GPU's physical address space (CIK).
5628  * Returns 0 for success.
5629  */
5630 static int cik_mc_init(struct radeon_device *rdev)
5631 {
5632 	u32 tmp;
5633 	int chansize, numchan;
5634 
5635 	/* Get VRAM informations */
5636 	rdev->mc.vram_is_ddr = true;
5637 	tmp = RREG32(MC_ARB_RAMCFG);
5638 	if (tmp & CHANSIZE_MASK) {
5639 		chansize = 64;
5640 	} else {
5641 		chansize = 32;
5642 	}
5643 	tmp = RREG32(MC_SHARED_CHMAP);
5644 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5645 	case 0:
5646 	default:
5647 		numchan = 1;
5648 		break;
5649 	case 1:
5650 		numchan = 2;
5651 		break;
5652 	case 2:
5653 		numchan = 4;
5654 		break;
5655 	case 3:
5656 		numchan = 8;
5657 		break;
5658 	case 4:
5659 		numchan = 3;
5660 		break;
5661 	case 5:
5662 		numchan = 6;
5663 		break;
5664 	case 6:
5665 		numchan = 10;
5666 		break;
5667 	case 7:
5668 		numchan = 12;
5669 		break;
5670 	case 8:
5671 		numchan = 16;
5672 		break;
5673 	}
5674 	rdev->mc.vram_width = numchan * chansize;
5675 	/* Could aper size report 0 ? */
5676 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5677 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5678 	/* size in MB on si */
5679 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5681 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5682 	si_vram_gtt_location(rdev, &rdev->mc);
5683 	radeon_update_bandwidth_info(rdev);
5684 
5685 	return 0;
5686 }
5687 
5688 /*
5689  * GART
5690  * VMID 0 is the physical GPU addresses as used by the kernel.
5691  * VMIDs 1-15 are used for userspace clients and are handled
5692  * by the radeon vm/hsa code.
5693  */
5694 /**
5695  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5696  *
5697  * @rdev: radeon_device pointer
5698  *
5699  * Flush the TLB for the VMID 0 page table (CIK).
5700  */
5701 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5702 {
5703 	/* flush hdp cache */
5704 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5705 
5706 	/* bits 0-15 are the VM contexts0-15 */
5707 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5708 }
5709 
5710 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5711 {
5712 	int i;
5713 	uint32_t sh_mem_bases, sh_mem_config;
5714 
5715 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5716 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5717 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5718 
5719 	mutex_lock(&rdev->srbm_mutex);
5720 	for (i = 8; i < 16; i++) {
5721 		cik_srbm_select(rdev, 0, 0, 0, i);
5722 		/* CP and shaders */
5723 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5724 		WREG32(SH_MEM_APE1_BASE, 1);
5725 		WREG32(SH_MEM_APE1_LIMIT, 0);
5726 		WREG32(SH_MEM_BASES, sh_mem_bases);
5727 	}
5728 	cik_srbm_select(rdev, 0, 0, 0, 0);
5729 	mutex_unlock(&rdev->srbm_mutex);
5730 }
5731 
5732 /**
5733  * cik_pcie_gart_enable - gart enable
5734  *
5735  * @rdev: radeon_device pointer
5736  *
5737  * This sets up the TLBs, programs the page tables for VMID0,
5738  * sets up the hw for VMIDs 1-15 which are allocated on
5739  * demand, and sets up the global locations for the LDS, GDS,
5740  * and GPUVM for FSA64 clients (CIK).
5741  * Returns 0 for success, errors for failure.
5742  */
5743 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5744 {
5745 	int r, i;
5746 
5747 	if (rdev->gart.robj == NULL) {
5748 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5749 		return -EINVAL;
5750 	}
5751 	r = radeon_gart_table_vram_pin(rdev);
5752 	if (r)
5753 		return r;
5754 	/* Setup TLB control */
5755 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5756 	       (0xA << 7) |
5757 	       ENABLE_L1_TLB |
5758 	       ENABLE_L1_FRAGMENT_PROCESSING |
5759 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5760 	       ENABLE_ADVANCED_DRIVER_MODEL |
5761 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5762 	/* Setup L2 cache */
5763 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5764 	       ENABLE_L2_FRAGMENT_PROCESSING |
5765 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5766 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5767 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5768 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5769 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5770 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5771 	       BANK_SELECT(4) |
5772 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5773 	/* setup context0 */
5774 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5775 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5776 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5777 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5778 			(u32)(rdev->dummy_page.addr >> 12));
5779 	WREG32(VM_CONTEXT0_CNTL2, 0);
5780 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5781 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5782 
5783 	WREG32(0x15D4, 0);
5784 	WREG32(0x15D8, 0);
5785 	WREG32(0x15DC, 0);
5786 
5787 	/* restore context1-15 */
5788 	/* set vm size, must be a multiple of 4 */
5789 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5790 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5791 	for (i = 1; i < 16; i++) {
5792 		if (i < 8)
5793 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5794 			       rdev->vm_manager.saved_table_addr[i]);
5795 		else
5796 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5797 			       rdev->vm_manager.saved_table_addr[i]);
5798 	}
5799 
5800 	/* enable context1-15 */
5801 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5802 	       (u32)(rdev->dummy_page.addr >> 12));
5803 	WREG32(VM_CONTEXT1_CNTL2, 4);
5804 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5805 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5806 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5807 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5808 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5809 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5810 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5811 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5812 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5813 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5814 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5815 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5816 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5817 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5818 
5819 	if (rdev->family == CHIP_KAVERI) {
5820 		u32 tmp = RREG32(CHUB_CONTROL);
5821 		tmp &= ~BYPASS_VM;
5822 		WREG32(CHUB_CONTROL, tmp);
5823 	}
5824 
5825 	/* XXX SH_MEM regs */
5826 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5827 	mutex_lock(&rdev->srbm_mutex);
5828 	for (i = 0; i < 16; i++) {
5829 		cik_srbm_select(rdev, 0, 0, 0, i);
5830 		/* CP and shaders */
5831 		WREG32(SH_MEM_CONFIG, 0);
5832 		WREG32(SH_MEM_APE1_BASE, 1);
5833 		WREG32(SH_MEM_APE1_LIMIT, 0);
5834 		WREG32(SH_MEM_BASES, 0);
5835 		/* SDMA GFX */
5836 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5837 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5838 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5839 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5840 		/* XXX SDMA RLC - todo */
5841 	}
5842 	cik_srbm_select(rdev, 0, 0, 0, 0);
5843 	mutex_unlock(&rdev->srbm_mutex);
5844 
5845 	cik_pcie_init_compute_vmid(rdev);
5846 
5847 	cik_pcie_gart_tlb_flush(rdev);
5848 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5849 		 (unsigned)(rdev->mc.gtt_size >> 20),
5850 		 (unsigned long long)rdev->gart.table_addr);
5851 	rdev->gart.ready = true;
5852 	return 0;
5853 }
5854 
5855 /**
5856  * cik_pcie_gart_disable - gart disable
5857  *
5858  * @rdev: radeon_device pointer
5859  *
5860  * This disables all VM page table (CIK).
5861  */
5862 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5863 {
5864 	unsigned i;
5865 
5866 	for (i = 1; i < 16; ++i) {
5867 		uint32_t reg;
5868 		if (i < 8)
5869 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5870 		else
5871 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5872 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5873 	}
5874 
5875 	/* Disable all tables */
5876 	WREG32(VM_CONTEXT0_CNTL, 0);
5877 	WREG32(VM_CONTEXT1_CNTL, 0);
5878 	/* Setup TLB control */
5879 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5880 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5881 	/* Setup L2 cache */
5882 	WREG32(VM_L2_CNTL,
5883 	       ENABLE_L2_FRAGMENT_PROCESSING |
5884 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5885 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5886 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5887 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5888 	WREG32(VM_L2_CNTL2, 0);
5889 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5890 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5891 	radeon_gart_table_vram_unpin(rdev);
5892 }
5893 
5894 /**
5895  * cik_pcie_gart_fini - vm fini callback
5896  *
5897  * @rdev: radeon_device pointer
5898  *
5899  * Tears down the driver GART/VM setup (CIK).
5900  */
5901 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5902 {
5903 	cik_pcie_gart_disable(rdev);
5904 	radeon_gart_table_vram_free(rdev);
5905 	radeon_gart_fini(rdev);
5906 }
5907 
5908 /* vm parser */
5909 /**
5910  * cik_ib_parse - vm ib_parse callback
5911  *
5912  * @rdev: radeon_device pointer
5913  * @ib: indirect buffer pointer
5914  *
5915  * CIK uses hw IB checking so this is a nop (CIK).
5916  */
5917 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5918 {
5919 	return 0;
5920 }
5921 
5922 /*
5923  * vm
5924  * VMID 0 is the physical GPU addresses as used by the kernel.
5925  * VMIDs 1-15 are used for userspace clients and are handled
5926  * by the radeon vm/hsa code.
5927  */
5928 /**
5929  * cik_vm_init - cik vm init callback
5930  *
5931  * @rdev: radeon_device pointer
5932  *
5933  * Inits cik specific vm parameters (number of VMs, base of vram for
5934  * VMIDs 1-15) (CIK).
5935  * Returns 0 for success.
5936  */
5937 int cik_vm_init(struct radeon_device *rdev)
5938 {
5939 	/*
5940 	 * number of VMs
5941 	 * VMID 0 is reserved for System
5942 	 * radeon graphics/compute will use VMIDs 1-7
5943 	 * amdkfd will use VMIDs 8-15
5944 	 */
5945 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5946 	/* base offset of vram pages */
5947 	if (rdev->flags & RADEON_IS_IGP) {
5948 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5949 		tmp <<= 22;
5950 		rdev->vm_manager.vram_base_offset = tmp;
5951 	} else
5952 		rdev->vm_manager.vram_base_offset = 0;
5953 
5954 	return 0;
5955 }
5956 
5957 /**
5958  * cik_vm_fini - cik vm fini callback
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Tear down any asic specific VM setup (CIK).
5963  */
5964 void cik_vm_fini(struct radeon_device *rdev)
5965 {
5966 }
5967 
5968 /**
5969  * cik_vm_decode_fault - print human readable fault info
5970  *
5971  * @rdev: radeon_device pointer
5972  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5973  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5974  *
5975  * Print human readable fault information (CIK).
5976  */
5977 static void cik_vm_decode_fault(struct radeon_device *rdev,
5978 				u32 status, u32 addr, u32 mc_client)
5979 {
5980 	u32 mc_id;
5981 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5982 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5983 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5984 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5985 
5986 	if (rdev->family == CHIP_HAWAII)
5987 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5988 	else
5989 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5990 
5991 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5992 	       protections, vmid, addr,
5993 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5994 	       block, mc_client, mc_id);
5995 }
5996 
5997 /**
5998  * cik_vm_flush - cik vm flush using the CP
5999  *
6000  * @rdev: radeon_device pointer
6001  *
6002  * Update the page table base and flush the VM TLB
6003  * using the CP (CIK).
6004  */
6005 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6006 		  unsigned vm_id, uint64_t pd_addr)
6007 {
6008 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6009 
6010 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6011 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6012 				 WRITE_DATA_DST_SEL(0)));
6013 	if (vm_id < 8) {
6014 		radeon_ring_write(ring,
6015 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6016 	} else {
6017 		radeon_ring_write(ring,
6018 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6019 	}
6020 	radeon_ring_write(ring, 0);
6021 	radeon_ring_write(ring, pd_addr >> 12);
6022 
6023 	/* update SH_MEM_* regs */
6024 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6025 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6026 				 WRITE_DATA_DST_SEL(0)));
6027 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6028 	radeon_ring_write(ring, 0);
6029 	radeon_ring_write(ring, VMID(vm_id));
6030 
6031 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6032 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6033 				 WRITE_DATA_DST_SEL(0)));
6034 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6035 	radeon_ring_write(ring, 0);
6036 
6037 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6038 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6039 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6040 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6041 
6042 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6043 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6044 				 WRITE_DATA_DST_SEL(0)));
6045 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6046 	radeon_ring_write(ring, 0);
6047 	radeon_ring_write(ring, VMID(0));
6048 
6049 	/* HDP flush */
6050 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6051 
6052 	/* bits 0-15 are the VM contexts0-15 */
6053 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6054 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6055 				 WRITE_DATA_DST_SEL(0)));
6056 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6057 	radeon_ring_write(ring, 0);
6058 	radeon_ring_write(ring, 1 << vm_id);
6059 
6060 	/* wait for the invalidate to complete */
6061 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6062 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6063 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6064 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6065 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6066 	radeon_ring_write(ring, 0);
6067 	radeon_ring_write(ring, 0); /* ref */
6068 	radeon_ring_write(ring, 0); /* mask */
6069 	radeon_ring_write(ring, 0x20); /* poll interval */
6070 
6071 	/* compute doesn't have PFP */
6072 	if (usepfp) {
6073 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6074 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6075 		radeon_ring_write(ring, 0x0);
6076 	}
6077 }
6078 
6079 /*
6080  * RLC
6081  * The RLC is a multi-purpose microengine that handles a
6082  * variety of functions, the most important of which is
6083  * the interrupt controller.
6084  */
6085 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6086 					  bool enable)
6087 {
6088 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6089 
6090 	if (enable)
6091 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6092 	else
6093 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6094 	WREG32(CP_INT_CNTL_RING0, tmp);
6095 }
6096 
6097 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6098 {
6099 	u32 tmp;
6100 
6101 	tmp = RREG32(RLC_LB_CNTL);
6102 	if (enable)
6103 		tmp |= LOAD_BALANCE_ENABLE;
6104 	else
6105 		tmp &= ~LOAD_BALANCE_ENABLE;
6106 	WREG32(RLC_LB_CNTL, tmp);
6107 }
6108 
6109 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6110 {
6111 	u32 i, j, k;
6112 	u32 mask;
6113 
6114 	mutex_lock(&rdev->grbm_idx_mutex);
6115 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6116 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6117 			cik_select_se_sh(rdev, i, j);
6118 			for (k = 0; k < rdev->usec_timeout; k++) {
6119 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6120 					break;
6121 				udelay(1);
6122 			}
6123 		}
6124 	}
6125 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6126 	mutex_unlock(&rdev->grbm_idx_mutex);
6127 
6128 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6129 	for (k = 0; k < rdev->usec_timeout; k++) {
6130 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6131 			break;
6132 		udelay(1);
6133 	}
6134 }
6135 
6136 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6137 {
6138 	u32 tmp;
6139 
6140 	tmp = RREG32(RLC_CNTL);
6141 	if (tmp != rlc)
6142 		WREG32(RLC_CNTL, rlc);
6143 }
6144 
6145 static u32 cik_halt_rlc(struct radeon_device *rdev)
6146 {
6147 	u32 data, orig;
6148 
6149 	orig = data = RREG32(RLC_CNTL);
6150 
6151 	if (data & RLC_ENABLE) {
6152 		u32 i;
6153 
6154 		data &= ~RLC_ENABLE;
6155 		WREG32(RLC_CNTL, data);
6156 
6157 		for (i = 0; i < rdev->usec_timeout; i++) {
6158 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6159 				break;
6160 			udelay(1);
6161 		}
6162 
6163 		cik_wait_for_rlc_serdes(rdev);
6164 	}
6165 
6166 	return orig;
6167 }
6168 
6169 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6170 {
6171 	u32 tmp, i, mask;
6172 
6173 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6174 	WREG32(RLC_GPR_REG2, tmp);
6175 
6176 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6177 	for (i = 0; i < rdev->usec_timeout; i++) {
6178 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6179 			break;
6180 		udelay(1);
6181 	}
6182 
6183 	for (i = 0; i < rdev->usec_timeout; i++) {
6184 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6185 			break;
6186 		udelay(1);
6187 	}
6188 }
6189 
6190 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6191 {
6192 	u32 tmp;
6193 
6194 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6195 	WREG32(RLC_GPR_REG2, tmp);
6196 }
6197 
6198 /**
6199  * cik_rlc_stop - stop the RLC ME
6200  *
6201  * @rdev: radeon_device pointer
6202  *
6203  * Halt the RLC ME (MicroEngine) (CIK).
6204  */
6205 static void cik_rlc_stop(struct radeon_device *rdev)
6206 {
6207 	WREG32(RLC_CNTL, 0);
6208 
6209 	cik_enable_gui_idle_interrupt(rdev, false);
6210 
6211 	cik_wait_for_rlc_serdes(rdev);
6212 }
6213 
6214 /**
6215  * cik_rlc_start - start the RLC ME
6216  *
6217  * @rdev: radeon_device pointer
6218  *
6219  * Unhalt the RLC ME (MicroEngine) (CIK).
6220  */
6221 static void cik_rlc_start(struct radeon_device *rdev)
6222 {
6223 	WREG32(RLC_CNTL, RLC_ENABLE);
6224 
6225 	cik_enable_gui_idle_interrupt(rdev, true);
6226 
6227 	udelay(50);
6228 }
6229 
6230 /**
6231  * cik_rlc_resume - setup the RLC hw
6232  *
6233  * @rdev: radeon_device pointer
6234  *
6235  * Initialize the RLC registers, load the ucode,
6236  * and start the RLC (CIK).
6237  * Returns 0 for success, -EINVAL if the ucode is not available.
6238  */
6239 static int cik_rlc_resume(struct radeon_device *rdev)
6240 {
6241 	u32 i, size, tmp;
6242 
6243 	if (!rdev->rlc_fw)
6244 		return -EINVAL;
6245 
6246 	cik_rlc_stop(rdev);
6247 
6248 	/* disable CG */
6249 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6250 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6251 
6252 	si_rlc_reset(rdev);
6253 
6254 	cik_init_pg(rdev);
6255 
6256 	cik_init_cg(rdev);
6257 
6258 	WREG32(RLC_LB_CNTR_INIT, 0);
6259 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6260 
6261 	mutex_lock(&rdev->grbm_idx_mutex);
6262 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6263 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6264 	WREG32(RLC_LB_PARAMS, 0x00600408);
6265 	WREG32(RLC_LB_CNTL, 0x80000004);
6266 	mutex_unlock(&rdev->grbm_idx_mutex);
6267 
6268 	WREG32(RLC_MC_CNTL, 0);
6269 	WREG32(RLC_UCODE_CNTL, 0);
6270 
6271 	if (rdev->new_fw) {
6272 		const struct rlc_firmware_header_v1_0 *hdr =
6273 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6274 		const __le32 *fw_data = (const __le32 *)
6275 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6276 
6277 		radeon_ucode_print_rlc_hdr(&hdr->header);
6278 
6279 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6280 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6281 		for (i = 0; i < size; i++)
6282 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6283 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6284 	} else {
6285 		const __be32 *fw_data;
6286 
6287 		switch (rdev->family) {
6288 		case CHIP_BONAIRE:
6289 		case CHIP_HAWAII:
6290 		default:
6291 			size = BONAIRE_RLC_UCODE_SIZE;
6292 			break;
6293 		case CHIP_KAVERI:
6294 			size = KV_RLC_UCODE_SIZE;
6295 			break;
6296 		case CHIP_KABINI:
6297 			size = KB_RLC_UCODE_SIZE;
6298 			break;
6299 		case CHIP_MULLINS:
6300 			size = ML_RLC_UCODE_SIZE;
6301 			break;
6302 		}
6303 
6304 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6305 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6306 		for (i = 0; i < size; i++)
6307 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6308 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6309 	}
6310 
6311 	/* XXX - find out what chips support lbpw */
6312 	cik_enable_lbpw(rdev, false);
6313 
6314 	if (rdev->family == CHIP_BONAIRE)
6315 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6316 
6317 	cik_rlc_start(rdev);
6318 
6319 	return 0;
6320 }
6321 
6322 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6323 {
6324 	u32 data, orig, tmp, tmp2;
6325 
6326 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6327 
6328 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6329 		cik_enable_gui_idle_interrupt(rdev, true);
6330 
6331 		tmp = cik_halt_rlc(rdev);
6332 
6333 		mutex_lock(&rdev->grbm_idx_mutex);
6334 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6335 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6336 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6337 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6338 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6339 		mutex_unlock(&rdev->grbm_idx_mutex);
6340 
6341 		cik_update_rlc(rdev, tmp);
6342 
6343 		data |= CGCG_EN | CGLS_EN;
6344 	} else {
6345 		cik_enable_gui_idle_interrupt(rdev, false);
6346 
6347 		RREG32(CB_CGTT_SCLK_CTRL);
6348 		RREG32(CB_CGTT_SCLK_CTRL);
6349 		RREG32(CB_CGTT_SCLK_CTRL);
6350 		RREG32(CB_CGTT_SCLK_CTRL);
6351 
6352 		data &= ~(CGCG_EN | CGLS_EN);
6353 	}
6354 
6355 	if (orig != data)
6356 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6357 
6358 }
6359 
6360 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6361 {
6362 	u32 data, orig, tmp = 0;
6363 
6364 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6365 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6366 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6367 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6368 				data |= CP_MEM_LS_EN;
6369 				if (orig != data)
6370 					WREG32(CP_MEM_SLP_CNTL, data);
6371 			}
6372 		}
6373 
6374 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6375 		data |= 0x00000001;
6376 		data &= 0xfffffffd;
6377 		if (orig != data)
6378 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6379 
6380 		tmp = cik_halt_rlc(rdev);
6381 
6382 		mutex_lock(&rdev->grbm_idx_mutex);
6383 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6384 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6385 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6386 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6387 		WREG32(RLC_SERDES_WR_CTRL, data);
6388 		mutex_unlock(&rdev->grbm_idx_mutex);
6389 
6390 		cik_update_rlc(rdev, tmp);
6391 
6392 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6393 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6394 			data &= ~SM_MODE_MASK;
6395 			data |= SM_MODE(0x2);
6396 			data |= SM_MODE_ENABLE;
6397 			data &= ~CGTS_OVERRIDE;
6398 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6399 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6400 				data &= ~CGTS_LS_OVERRIDE;
6401 			data &= ~ON_MONITOR_ADD_MASK;
6402 			data |= ON_MONITOR_ADD_EN;
6403 			data |= ON_MONITOR_ADD(0x96);
6404 			if (orig != data)
6405 				WREG32(CGTS_SM_CTRL_REG, data);
6406 		}
6407 	} else {
6408 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6409 		data |= 0x00000003;
6410 		if (orig != data)
6411 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6412 
6413 		data = RREG32(RLC_MEM_SLP_CNTL);
6414 		if (data & RLC_MEM_LS_EN) {
6415 			data &= ~RLC_MEM_LS_EN;
6416 			WREG32(RLC_MEM_SLP_CNTL, data);
6417 		}
6418 
6419 		data = RREG32(CP_MEM_SLP_CNTL);
6420 		if (data & CP_MEM_LS_EN) {
6421 			data &= ~CP_MEM_LS_EN;
6422 			WREG32(CP_MEM_SLP_CNTL, data);
6423 		}
6424 
6425 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6426 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6427 		if (orig != data)
6428 			WREG32(CGTS_SM_CTRL_REG, data);
6429 
6430 		tmp = cik_halt_rlc(rdev);
6431 
6432 		mutex_lock(&rdev->grbm_idx_mutex);
6433 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6434 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6435 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6436 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6437 		WREG32(RLC_SERDES_WR_CTRL, data);
6438 		mutex_unlock(&rdev->grbm_idx_mutex);
6439 
6440 		cik_update_rlc(rdev, tmp);
6441 	}
6442 }
6443 
6444 static const u32 mc_cg_registers[] =
6445 {
6446 	MC_HUB_MISC_HUB_CG,
6447 	MC_HUB_MISC_SIP_CG,
6448 	MC_HUB_MISC_VM_CG,
6449 	MC_XPB_CLK_GAT,
6450 	ATC_MISC_CG,
6451 	MC_CITF_MISC_WR_CG,
6452 	MC_CITF_MISC_RD_CG,
6453 	MC_CITF_MISC_VM_CG,
6454 	VM_L2_CG,
6455 };
6456 
6457 static void cik_enable_mc_ls(struct radeon_device *rdev,
6458 			     bool enable)
6459 {
6460 	int i;
6461 	u32 orig, data;
6462 
6463 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6464 		orig = data = RREG32(mc_cg_registers[i]);
6465 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6466 			data |= MC_LS_ENABLE;
6467 		else
6468 			data &= ~MC_LS_ENABLE;
6469 		if (data != orig)
6470 			WREG32(mc_cg_registers[i], data);
6471 	}
6472 }
6473 
6474 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6475 			       bool enable)
6476 {
6477 	int i;
6478 	u32 orig, data;
6479 
6480 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6481 		orig = data = RREG32(mc_cg_registers[i]);
6482 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6483 			data |= MC_CG_ENABLE;
6484 		else
6485 			data &= ~MC_CG_ENABLE;
6486 		if (data != orig)
6487 			WREG32(mc_cg_registers[i], data);
6488 	}
6489 }
6490 
6491 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6492 				 bool enable)
6493 {
6494 	u32 orig, data;
6495 
6496 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6497 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6498 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6499 	} else {
6500 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6501 		data |= 0xff000000;
6502 		if (data != orig)
6503 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6504 
6505 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6506 		data |= 0xff000000;
6507 		if (data != orig)
6508 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6509 	}
6510 }
6511 
6512 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6513 				 bool enable)
6514 {
6515 	u32 orig, data;
6516 
6517 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6518 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6519 		data |= 0x100;
6520 		if (orig != data)
6521 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6522 
6523 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6524 		data |= 0x100;
6525 		if (orig != data)
6526 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6527 	} else {
6528 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6529 		data &= ~0x100;
6530 		if (orig != data)
6531 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6532 
6533 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6534 		data &= ~0x100;
6535 		if (orig != data)
6536 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6537 	}
6538 }
6539 
6540 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6541 				bool enable)
6542 {
6543 	u32 orig, data;
6544 
6545 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6546 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6547 		data = 0xfff;
6548 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6549 
6550 		orig = data = RREG32(UVD_CGC_CTRL);
6551 		data |= DCM;
6552 		if (orig != data)
6553 			WREG32(UVD_CGC_CTRL, data);
6554 	} else {
6555 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6556 		data &= ~0xfff;
6557 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6558 
6559 		orig = data = RREG32(UVD_CGC_CTRL);
6560 		data &= ~DCM;
6561 		if (orig != data)
6562 			WREG32(UVD_CGC_CTRL, data);
6563 	}
6564 }
6565 
6566 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6567 			       bool enable)
6568 {
6569 	u32 orig, data;
6570 
6571 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6572 
6573 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6574 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6575 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6576 	else
6577 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6578 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6579 
6580 	if (orig != data)
6581 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6582 }
6583 
6584 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6585 				bool enable)
6586 {
6587 	u32 orig, data;
6588 
6589 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6590 
6591 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6592 		data &= ~CLOCK_GATING_DIS;
6593 	else
6594 		data |= CLOCK_GATING_DIS;
6595 
6596 	if (orig != data)
6597 		WREG32(HDP_HOST_PATH_CNTL, data);
6598 }
6599 
6600 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6601 			      bool enable)
6602 {
6603 	u32 orig, data;
6604 
6605 	orig = data = RREG32(HDP_MEM_POWER_LS);
6606 
6607 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6608 		data |= HDP_LS_ENABLE;
6609 	else
6610 		data &= ~HDP_LS_ENABLE;
6611 
6612 	if (orig != data)
6613 		WREG32(HDP_MEM_POWER_LS, data);
6614 }
6615 
6616 void cik_update_cg(struct radeon_device *rdev,
6617 		   u32 block, bool enable)
6618 {
6619 
6620 	if (block & RADEON_CG_BLOCK_GFX) {
6621 		cik_enable_gui_idle_interrupt(rdev, false);
6622 		/* order matters! */
6623 		if (enable) {
6624 			cik_enable_mgcg(rdev, true);
6625 			cik_enable_cgcg(rdev, true);
6626 		} else {
6627 			cik_enable_cgcg(rdev, false);
6628 			cik_enable_mgcg(rdev, false);
6629 		}
6630 		cik_enable_gui_idle_interrupt(rdev, true);
6631 	}
6632 
6633 	if (block & RADEON_CG_BLOCK_MC) {
6634 		if (!(rdev->flags & RADEON_IS_IGP)) {
6635 			cik_enable_mc_mgcg(rdev, enable);
6636 			cik_enable_mc_ls(rdev, enable);
6637 		}
6638 	}
6639 
6640 	if (block & RADEON_CG_BLOCK_SDMA) {
6641 		cik_enable_sdma_mgcg(rdev, enable);
6642 		cik_enable_sdma_mgls(rdev, enable);
6643 	}
6644 
6645 	if (block & RADEON_CG_BLOCK_BIF) {
6646 		cik_enable_bif_mgls(rdev, enable);
6647 	}
6648 
6649 	if (block & RADEON_CG_BLOCK_UVD) {
6650 		if (rdev->has_uvd)
6651 			cik_enable_uvd_mgcg(rdev, enable);
6652 	}
6653 
6654 	if (block & RADEON_CG_BLOCK_HDP) {
6655 		cik_enable_hdp_mgcg(rdev, enable);
6656 		cik_enable_hdp_ls(rdev, enable);
6657 	}
6658 
6659 	if (block & RADEON_CG_BLOCK_VCE) {
6660 		vce_v2_0_enable_mgcg(rdev, enable);
6661 	}
6662 }
6663 
6664 static void cik_init_cg(struct radeon_device *rdev)
6665 {
6666 
6667 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6668 
6669 	if (rdev->has_uvd)
6670 		si_init_uvd_internal_cg(rdev);
6671 
6672 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6673 			     RADEON_CG_BLOCK_SDMA |
6674 			     RADEON_CG_BLOCK_BIF |
6675 			     RADEON_CG_BLOCK_UVD |
6676 			     RADEON_CG_BLOCK_HDP), true);
6677 }
6678 
6679 static void cik_fini_cg(struct radeon_device *rdev)
6680 {
6681 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6682 			     RADEON_CG_BLOCK_SDMA |
6683 			     RADEON_CG_BLOCK_BIF |
6684 			     RADEON_CG_BLOCK_UVD |
6685 			     RADEON_CG_BLOCK_HDP), false);
6686 
6687 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6688 }
6689 
6690 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6691 					  bool enable)
6692 {
6693 	u32 data, orig;
6694 
6695 	orig = data = RREG32(RLC_PG_CNTL);
6696 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6697 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6698 	else
6699 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6700 	if (orig != data)
6701 		WREG32(RLC_PG_CNTL, data);
6702 }
6703 
6704 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6705 					  bool enable)
6706 {
6707 	u32 data, orig;
6708 
6709 	orig = data = RREG32(RLC_PG_CNTL);
6710 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6711 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6712 	else
6713 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6714 	if (orig != data)
6715 		WREG32(RLC_PG_CNTL, data);
6716 }
6717 
6718 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6719 {
6720 	u32 data, orig;
6721 
6722 	orig = data = RREG32(RLC_PG_CNTL);
6723 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6724 		data &= ~DISABLE_CP_PG;
6725 	else
6726 		data |= DISABLE_CP_PG;
6727 	if (orig != data)
6728 		WREG32(RLC_PG_CNTL, data);
6729 }
6730 
6731 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6732 {
6733 	u32 data, orig;
6734 
6735 	orig = data = RREG32(RLC_PG_CNTL);
6736 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6737 		data &= ~DISABLE_GDS_PG;
6738 	else
6739 		data |= DISABLE_GDS_PG;
6740 	if (orig != data)
6741 		WREG32(RLC_PG_CNTL, data);
6742 }
6743 
6744 #define CP_ME_TABLE_SIZE    96
6745 #define CP_ME_TABLE_OFFSET  2048
6746 #define CP_MEC_TABLE_OFFSET 4096
6747 
6748 void cik_init_cp_pg_table(struct radeon_device *rdev)
6749 {
6750 	volatile u32 *dst_ptr;
6751 	int me, i, max_me = 4;
6752 	u32 bo_offset = 0;
6753 	u32 table_offset, table_size;
6754 
6755 	if (rdev->family == CHIP_KAVERI)
6756 		max_me = 5;
6757 
6758 	if (rdev->rlc.cp_table_ptr == NULL)
6759 		return;
6760 
6761 	/* write the cp table buffer */
6762 	dst_ptr = rdev->rlc.cp_table_ptr;
6763 	for (me = 0; me < max_me; me++) {
6764 		if (rdev->new_fw) {
6765 			const __le32 *fw_data;
6766 			const struct gfx_firmware_header_v1_0 *hdr;
6767 
6768 			if (me == 0) {
6769 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6770 				fw_data = (const __le32 *)
6771 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6772 				table_offset = le32_to_cpu(hdr->jt_offset);
6773 				table_size = le32_to_cpu(hdr->jt_size);
6774 			} else if (me == 1) {
6775 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6776 				fw_data = (const __le32 *)
6777 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6778 				table_offset = le32_to_cpu(hdr->jt_offset);
6779 				table_size = le32_to_cpu(hdr->jt_size);
6780 			} else if (me == 2) {
6781 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6782 				fw_data = (const __le32 *)
6783 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6784 				table_offset = le32_to_cpu(hdr->jt_offset);
6785 				table_size = le32_to_cpu(hdr->jt_size);
6786 			} else if (me == 3) {
6787 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6788 				fw_data = (const __le32 *)
6789 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6790 				table_offset = le32_to_cpu(hdr->jt_offset);
6791 				table_size = le32_to_cpu(hdr->jt_size);
6792 			} else {
6793 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6794 				fw_data = (const __le32 *)
6795 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6796 				table_offset = le32_to_cpu(hdr->jt_offset);
6797 				table_size = le32_to_cpu(hdr->jt_size);
6798 			}
6799 
6800 			for (i = 0; i < table_size; i ++) {
6801 				dst_ptr[bo_offset + i] =
6802 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6803 			}
6804 			bo_offset += table_size;
6805 		} else {
6806 			const __be32 *fw_data;
6807 			table_size = CP_ME_TABLE_SIZE;
6808 
6809 			if (me == 0) {
6810 				fw_data = (const __be32 *)rdev->ce_fw->data;
6811 				table_offset = CP_ME_TABLE_OFFSET;
6812 			} else if (me == 1) {
6813 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6814 				table_offset = CP_ME_TABLE_OFFSET;
6815 			} else if (me == 2) {
6816 				fw_data = (const __be32 *)rdev->me_fw->data;
6817 				table_offset = CP_ME_TABLE_OFFSET;
6818 			} else {
6819 				fw_data = (const __be32 *)rdev->mec_fw->data;
6820 				table_offset = CP_MEC_TABLE_OFFSET;
6821 			}
6822 
6823 			for (i = 0; i < table_size; i ++) {
6824 				dst_ptr[bo_offset + i] =
6825 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6826 			}
6827 			bo_offset += table_size;
6828 		}
6829 	}
6830 }
6831 
6832 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6833 				bool enable)
6834 {
6835 	u32 data, orig;
6836 
6837 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6838 		orig = data = RREG32(RLC_PG_CNTL);
6839 		data |= GFX_PG_ENABLE;
6840 		if (orig != data)
6841 			WREG32(RLC_PG_CNTL, data);
6842 
6843 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6844 		data |= AUTO_PG_EN;
6845 		if (orig != data)
6846 			WREG32(RLC_AUTO_PG_CTRL, data);
6847 	} else {
6848 		orig = data = RREG32(RLC_PG_CNTL);
6849 		data &= ~GFX_PG_ENABLE;
6850 		if (orig != data)
6851 			WREG32(RLC_PG_CNTL, data);
6852 
6853 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6854 		data &= ~AUTO_PG_EN;
6855 		if (orig != data)
6856 			WREG32(RLC_AUTO_PG_CTRL, data);
6857 
6858 		data = RREG32(DB_RENDER_CONTROL);
6859 	}
6860 }
6861 
6862 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6863 {
6864 	u32 mask = 0, tmp, tmp1;
6865 	int i;
6866 
6867 	mutex_lock(&rdev->grbm_idx_mutex);
6868 	cik_select_se_sh(rdev, se, sh);
6869 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6870 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6871 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6872 	mutex_unlock(&rdev->grbm_idx_mutex);
6873 
6874 	tmp &= 0xffff0000;
6875 
6876 	tmp |= tmp1;
6877 	tmp >>= 16;
6878 
6879 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6880 		mask <<= 1;
6881 		mask |= 1;
6882 	}
6883 
6884 	return (~tmp) & mask;
6885 }
6886 
6887 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6888 {
6889 	u32 i, j, k, active_cu_number = 0;
6890 	u32 mask, counter, cu_bitmap;
6891 	u32 tmp = 0;
6892 
6893 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6894 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6895 			mask = 1;
6896 			cu_bitmap = 0;
6897 			counter = 0;
6898 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6899 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6900 					if (counter < 2)
6901 						cu_bitmap |= mask;
6902 					counter ++;
6903 				}
6904 				mask <<= 1;
6905 			}
6906 
6907 			active_cu_number += counter;
6908 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6909 		}
6910 	}
6911 
6912 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6913 
6914 	tmp = RREG32(RLC_MAX_PG_CU);
6915 	tmp &= ~MAX_PU_CU_MASK;
6916 	tmp |= MAX_PU_CU(active_cu_number);
6917 	WREG32(RLC_MAX_PG_CU, tmp);
6918 }
6919 
6920 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6921 				       bool enable)
6922 {
6923 	u32 data, orig;
6924 
6925 	orig = data = RREG32(RLC_PG_CNTL);
6926 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6927 		data |= STATIC_PER_CU_PG_ENABLE;
6928 	else
6929 		data &= ~STATIC_PER_CU_PG_ENABLE;
6930 	if (orig != data)
6931 		WREG32(RLC_PG_CNTL, data);
6932 }
6933 
6934 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6935 					bool enable)
6936 {
6937 	u32 data, orig;
6938 
6939 	orig = data = RREG32(RLC_PG_CNTL);
6940 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6941 		data |= DYN_PER_CU_PG_ENABLE;
6942 	else
6943 		data &= ~DYN_PER_CU_PG_ENABLE;
6944 	if (orig != data)
6945 		WREG32(RLC_PG_CNTL, data);
6946 }
6947 
6948 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6949 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6950 
6951 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6952 {
6953 	u32 data, orig;
6954 	u32 i;
6955 
6956 	if (rdev->rlc.cs_data) {
6957 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6958 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6959 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6960 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6961 	} else {
6962 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6963 		for (i = 0; i < 3; i++)
6964 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6965 	}
6966 	if (rdev->rlc.reg_list) {
6967 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6968 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6969 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6970 	}
6971 
6972 	orig = data = RREG32(RLC_PG_CNTL);
6973 	data |= GFX_PG_SRC;
6974 	if (orig != data)
6975 		WREG32(RLC_PG_CNTL, data);
6976 
6977 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6978 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6979 
6980 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6981 	data &= ~IDLE_POLL_COUNT_MASK;
6982 	data |= IDLE_POLL_COUNT(0x60);
6983 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6984 
6985 	data = 0x10101010;
6986 	WREG32(RLC_PG_DELAY, data);
6987 
6988 	data = RREG32(RLC_PG_DELAY_2);
6989 	data &= ~0xff;
6990 	data |= 0x3;
6991 	WREG32(RLC_PG_DELAY_2, data);
6992 
6993 	data = RREG32(RLC_AUTO_PG_CTRL);
6994 	data &= ~GRBM_REG_SGIT_MASK;
6995 	data |= GRBM_REG_SGIT(0x700);
6996 	WREG32(RLC_AUTO_PG_CTRL, data);
6997 
6998 }
6999 
7000 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7001 {
7002 	cik_enable_gfx_cgpg(rdev, enable);
7003 	cik_enable_gfx_static_mgpg(rdev, enable);
7004 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7005 }
7006 
7007 u32 cik_get_csb_size(struct radeon_device *rdev)
7008 {
7009 	u32 count = 0;
7010 	const struct cs_section_def *sect = NULL;
7011 	const struct cs_extent_def *ext = NULL;
7012 
7013 	if (rdev->rlc.cs_data == NULL)
7014 		return 0;
7015 
7016 	/* begin clear state */
7017 	count += 2;
7018 	/* context control state */
7019 	count += 3;
7020 
7021 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7022 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7023 			if (sect->id == SECT_CONTEXT)
7024 				count += 2 + ext->reg_count;
7025 			else
7026 				return 0;
7027 		}
7028 	}
7029 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7030 	count += 4;
7031 	/* end clear state */
7032 	count += 2;
7033 	/* clear state */
7034 	count += 2;
7035 
7036 	return count;
7037 }
7038 
7039 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7040 {
7041 	u32 count = 0, i;
7042 	const struct cs_section_def *sect = NULL;
7043 	const struct cs_extent_def *ext = NULL;
7044 
7045 	if (rdev->rlc.cs_data == NULL)
7046 		return;
7047 	if (buffer == NULL)
7048 		return;
7049 
7050 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7051 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7052 
7053 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7054 	buffer[count++] = cpu_to_le32(0x80000000);
7055 	buffer[count++] = cpu_to_le32(0x80000000);
7056 
7057 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7058 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7059 			if (sect->id == SECT_CONTEXT) {
7060 				buffer[count++] =
7061 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7062 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7063 				for (i = 0; i < ext->reg_count; i++)
7064 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7065 			} else {
7066 				return;
7067 			}
7068 		}
7069 	}
7070 
7071 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7072 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7073 	switch (rdev->family) {
7074 	case CHIP_BONAIRE:
7075 		buffer[count++] = cpu_to_le32(0x16000012);
7076 		buffer[count++] = cpu_to_le32(0x00000000);
7077 		break;
7078 	case CHIP_KAVERI:
7079 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7080 		buffer[count++] = cpu_to_le32(0x00000000);
7081 		break;
7082 	case CHIP_KABINI:
7083 	case CHIP_MULLINS:
7084 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7085 		buffer[count++] = cpu_to_le32(0x00000000);
7086 		break;
7087 	case CHIP_HAWAII:
7088 		buffer[count++] = cpu_to_le32(0x3a00161a);
7089 		buffer[count++] = cpu_to_le32(0x0000002e);
7090 		break;
7091 	default:
7092 		buffer[count++] = cpu_to_le32(0x00000000);
7093 		buffer[count++] = cpu_to_le32(0x00000000);
7094 		break;
7095 	}
7096 
7097 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7098 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7099 
7100 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7101 	buffer[count++] = cpu_to_le32(0);
7102 }
7103 
7104 static void cik_init_pg(struct radeon_device *rdev)
7105 {
7106 	if (rdev->pg_flags) {
7107 		cik_enable_sck_slowdown_on_pu(rdev, true);
7108 		cik_enable_sck_slowdown_on_pd(rdev, true);
7109 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7110 			cik_init_gfx_cgpg(rdev);
7111 			cik_enable_cp_pg(rdev, true);
7112 			cik_enable_gds_pg(rdev, true);
7113 		}
7114 		cik_init_ao_cu_mask(rdev);
7115 		cik_update_gfx_pg(rdev, true);
7116 	}
7117 }
7118 
7119 static void cik_fini_pg(struct radeon_device *rdev)
7120 {
7121 	if (rdev->pg_flags) {
7122 		cik_update_gfx_pg(rdev, false);
7123 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7124 			cik_enable_cp_pg(rdev, false);
7125 			cik_enable_gds_pg(rdev, false);
7126 		}
7127 	}
7128 }
7129 
7130 /*
7131  * Interrupts
7132  * Starting with r6xx, interrupts are handled via a ring buffer.
7133  * Ring buffers are areas of GPU accessible memory that the GPU
7134  * writes interrupt vectors into and the host reads vectors out of.
7135  * There is a rptr (read pointer) that determines where the
7136  * host is currently reading, and a wptr (write pointer)
7137  * which determines where the GPU has written.  When the
7138  * pointers are equal, the ring is idle.  When the GPU
7139  * writes vectors to the ring buffer, it increments the
7140  * wptr.  When there is an interrupt, the host then starts
7141  * fetching commands and processing them until the pointers are
7142  * equal again at which point it updates the rptr.
7143  */
7144 
7145 /**
7146  * cik_enable_interrupts - Enable the interrupt ring buffer
7147  *
7148  * @rdev: radeon_device pointer
7149  *
7150  * Enable the interrupt ring buffer (CIK).
7151  */
7152 static void cik_enable_interrupts(struct radeon_device *rdev)
7153 {
7154 	u32 ih_cntl = RREG32(IH_CNTL);
7155 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7156 
7157 	ih_cntl |= ENABLE_INTR;
7158 	ih_rb_cntl |= IH_RB_ENABLE;
7159 	WREG32(IH_CNTL, ih_cntl);
7160 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7161 	rdev->ih.enabled = true;
7162 }
7163 
7164 /**
7165  * cik_disable_interrupts - Disable the interrupt ring buffer
7166  *
7167  * @rdev: radeon_device pointer
7168  *
7169  * Disable the interrupt ring buffer (CIK).
7170  */
7171 static void cik_disable_interrupts(struct radeon_device *rdev)
7172 {
7173 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7174 	u32 ih_cntl = RREG32(IH_CNTL);
7175 
7176 	ih_rb_cntl &= ~IH_RB_ENABLE;
7177 	ih_cntl &= ~ENABLE_INTR;
7178 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7179 	WREG32(IH_CNTL, ih_cntl);
7180 	/* set rptr, wptr to 0 */
7181 	WREG32(IH_RB_RPTR, 0);
7182 	WREG32(IH_RB_WPTR, 0);
7183 	rdev->ih.enabled = false;
7184 	rdev->ih.rptr = 0;
7185 }
7186 
7187 /**
7188  * cik_disable_interrupt_state - Disable all interrupt sources
7189  *
7190  * @rdev: radeon_device pointer
7191  *
7192  * Clear all interrupt enable bits used by the driver (CIK).
7193  */
7194 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7195 {
7196 	u32 tmp;
7197 
7198 	/* gfx ring */
7199 	tmp = RREG32(CP_INT_CNTL_RING0) &
7200 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7201 	WREG32(CP_INT_CNTL_RING0, tmp);
7202 	/* sdma */
7203 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7204 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7205 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7206 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7207 	/* compute queues */
7208 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7209 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7210 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7211 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7212 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7213 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7214 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7215 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7216 	/* grbm */
7217 	WREG32(GRBM_INT_CNTL, 0);
7218 	/* vline/vblank, etc. */
7219 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7220 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7221 	if (rdev->num_crtc >= 4) {
7222 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7223 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7224 	}
7225 	if (rdev->num_crtc >= 6) {
7226 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7227 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7228 	}
7229 	/* pflip */
7230 	if (rdev->num_crtc >= 2) {
7231 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7232 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7233 	}
7234 	if (rdev->num_crtc >= 4) {
7235 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7236 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7237 	}
7238 	if (rdev->num_crtc >= 6) {
7239 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7240 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7241 	}
7242 
7243 	/* dac hotplug */
7244 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7245 
7246 	/* digital hotplug */
7247 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7248 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7249 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7250 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7251 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7252 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7253 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7254 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7255 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7256 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7257 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7258 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7259 
7260 }
7261 
7262 /**
7263  * cik_irq_init - init and enable the interrupt ring
7264  *
7265  * @rdev: radeon_device pointer
7266  *
7267  * Allocate a ring buffer for the interrupt controller,
7268  * enable the RLC, disable interrupts, enable the IH
7269  * ring buffer and enable it (CIK).
7270  * Called at device load and reume.
7271  * Returns 0 for success, errors for failure.
7272  */
7273 static int cik_irq_init(struct radeon_device *rdev)
7274 {
7275 	int ret = 0;
7276 	int rb_bufsz;
7277 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7278 
7279 	/* allocate ring */
7280 	ret = r600_ih_ring_alloc(rdev);
7281 	if (ret)
7282 		return ret;
7283 
7284 	/* disable irqs */
7285 	cik_disable_interrupts(rdev);
7286 
7287 	/* init rlc */
7288 	ret = cik_rlc_resume(rdev);
7289 	if (ret) {
7290 		r600_ih_ring_fini(rdev);
7291 		return ret;
7292 	}
7293 
7294 	/* setup interrupt control */
7295 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7296 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7297 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7298 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7299 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7300 	 */
7301 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7302 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7303 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7304 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7305 
7306 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7307 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7308 
7309 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7310 		      IH_WPTR_OVERFLOW_CLEAR |
7311 		      (rb_bufsz << 1));
7312 
7313 	if (rdev->wb.enabled)
7314 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7315 
7316 	/* set the writeback address whether it's enabled or not */
7317 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7318 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7319 
7320 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7321 
7322 	/* set rptr, wptr to 0 */
7323 	WREG32(IH_RB_RPTR, 0);
7324 	WREG32(IH_RB_WPTR, 0);
7325 
7326 	/* Default settings for IH_CNTL (disabled at first) */
7327 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7328 	/* RPTR_REARM only works if msi's are enabled */
7329 	if (rdev->msi_enabled)
7330 		ih_cntl |= RPTR_REARM;
7331 	WREG32(IH_CNTL, ih_cntl);
7332 
7333 	/* force the active interrupt state to all disabled */
7334 	cik_disable_interrupt_state(rdev);
7335 
7336 	pci_set_master(rdev->pdev);
7337 
7338 	/* enable irqs */
7339 	cik_enable_interrupts(rdev);
7340 
7341 	return ret;
7342 }
7343 
7344 /**
7345  * cik_irq_set - enable/disable interrupt sources
7346  *
7347  * @rdev: radeon_device pointer
7348  *
7349  * Enable interrupt sources on the GPU (vblanks, hpd,
7350  * etc.) (CIK).
7351  * Returns 0 for success, errors for failure.
7352  */
7353 int cik_irq_set(struct radeon_device *rdev)
7354 {
7355 	u32 cp_int_cntl;
7356 	u32 cp_m1p0;
7357 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7358 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7359 	u32 grbm_int_cntl = 0;
7360 	u32 dma_cntl, dma_cntl1;
7361 	u32 thermal_int;
7362 
7363 	if (!rdev->irq.installed) {
7364 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7365 		return -EINVAL;
7366 	}
7367 	/* don't enable anything if the ih is disabled */
7368 	if (!rdev->ih.enabled) {
7369 		cik_disable_interrupts(rdev);
7370 		/* force the active interrupt state to all disabled */
7371 		cik_disable_interrupt_state(rdev);
7372 		return 0;
7373 	}
7374 
7375 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7376 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7377 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7378 
7379 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7380 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7381 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7382 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7383 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7384 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7385 
7386 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7387 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7388 
7389 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7390 
7391 	if (rdev->flags & RADEON_IS_IGP)
7392 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7393 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7394 	else
7395 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7396 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7397 
7398 	/* enable CP interrupts on all rings */
7399 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7400 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7401 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7402 	}
7403 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7404 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7405 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7406 		if (ring->me == 1) {
7407 			switch (ring->pipe) {
7408 			case 0:
7409 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7410 				break;
7411 			default:
7412 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7413 				break;
7414 			}
7415 		} else {
7416 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7417 		}
7418 	}
7419 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7420 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7421 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7422 		if (ring->me == 1) {
7423 			switch (ring->pipe) {
7424 			case 0:
7425 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7426 				break;
7427 			default:
7428 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7429 				break;
7430 			}
7431 		} else {
7432 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7433 		}
7434 	}
7435 
7436 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7437 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7438 		dma_cntl |= TRAP_ENABLE;
7439 	}
7440 
7441 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7442 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7443 		dma_cntl1 |= TRAP_ENABLE;
7444 	}
7445 
7446 	if (rdev->irq.crtc_vblank_int[0] ||
7447 	    atomic_read(&rdev->irq.pflip[0])) {
7448 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7449 		crtc1 |= VBLANK_INTERRUPT_MASK;
7450 	}
7451 	if (rdev->irq.crtc_vblank_int[1] ||
7452 	    atomic_read(&rdev->irq.pflip[1])) {
7453 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7454 		crtc2 |= VBLANK_INTERRUPT_MASK;
7455 	}
7456 	if (rdev->irq.crtc_vblank_int[2] ||
7457 	    atomic_read(&rdev->irq.pflip[2])) {
7458 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7459 		crtc3 |= VBLANK_INTERRUPT_MASK;
7460 	}
7461 	if (rdev->irq.crtc_vblank_int[3] ||
7462 	    atomic_read(&rdev->irq.pflip[3])) {
7463 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7464 		crtc4 |= VBLANK_INTERRUPT_MASK;
7465 	}
7466 	if (rdev->irq.crtc_vblank_int[4] ||
7467 	    atomic_read(&rdev->irq.pflip[4])) {
7468 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7469 		crtc5 |= VBLANK_INTERRUPT_MASK;
7470 	}
7471 	if (rdev->irq.crtc_vblank_int[5] ||
7472 	    atomic_read(&rdev->irq.pflip[5])) {
7473 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7474 		crtc6 |= VBLANK_INTERRUPT_MASK;
7475 	}
7476 	if (rdev->irq.hpd[0]) {
7477 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7478 		hpd1 |= DC_HPDx_INT_EN;
7479 	}
7480 	if (rdev->irq.hpd[1]) {
7481 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7482 		hpd2 |= DC_HPDx_INT_EN;
7483 	}
7484 	if (rdev->irq.hpd[2]) {
7485 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7486 		hpd3 |= DC_HPDx_INT_EN;
7487 	}
7488 	if (rdev->irq.hpd[3]) {
7489 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7490 		hpd4 |= DC_HPDx_INT_EN;
7491 	}
7492 	if (rdev->irq.hpd[4]) {
7493 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7494 		hpd5 |= DC_HPDx_INT_EN;
7495 	}
7496 	if (rdev->irq.hpd[5]) {
7497 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7498 		hpd6 |= DC_HPDx_INT_EN;
7499 	}
7500 
7501 	if (rdev->irq.dpm_thermal) {
7502 		DRM_DEBUG("dpm thermal\n");
7503 		if (rdev->flags & RADEON_IS_IGP)
7504 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7505 		else
7506 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7507 	}
7508 
7509 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7510 
7511 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7512 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7513 
7514 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7515 
7516 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7517 
7518 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7519 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7520 	if (rdev->num_crtc >= 4) {
7521 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7522 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7523 	}
7524 	if (rdev->num_crtc >= 6) {
7525 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7526 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7527 	}
7528 
7529 	if (rdev->num_crtc >= 2) {
7530 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7531 		       GRPH_PFLIP_INT_MASK);
7532 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7533 		       GRPH_PFLIP_INT_MASK);
7534 	}
7535 	if (rdev->num_crtc >= 4) {
7536 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7537 		       GRPH_PFLIP_INT_MASK);
7538 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7539 		       GRPH_PFLIP_INT_MASK);
7540 	}
7541 	if (rdev->num_crtc >= 6) {
7542 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7543 		       GRPH_PFLIP_INT_MASK);
7544 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7545 		       GRPH_PFLIP_INT_MASK);
7546 	}
7547 
7548 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7549 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7550 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7551 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7552 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7553 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7554 
7555 	if (rdev->flags & RADEON_IS_IGP)
7556 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7557 	else
7558 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7559 
7560 	return 0;
7561 }
7562 
7563 /**
7564  * cik_irq_ack - ack interrupt sources
7565  *
7566  * @rdev: radeon_device pointer
7567  *
7568  * Ack interrupt sources on the GPU (vblanks, hpd,
7569  * etc.) (CIK).  Certain interrupts sources are sw
7570  * generated and do not require an explicit ack.
7571  */
7572 static inline void cik_irq_ack(struct radeon_device *rdev)
7573 {
7574 	u32 tmp;
7575 
7576 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7577 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7578 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7579 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7580 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7581 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7582 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7583 
7584 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7585 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7586 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7587 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7588 	if (rdev->num_crtc >= 4) {
7589 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7590 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7591 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7592 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7593 	}
7594 	if (rdev->num_crtc >= 6) {
7595 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7596 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7597 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7598 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7599 	}
7600 
7601 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7602 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7603 		       GRPH_PFLIP_INT_CLEAR);
7604 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7605 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7606 		       GRPH_PFLIP_INT_CLEAR);
7607 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7608 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7609 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7610 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7611 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7612 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7613 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7614 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7615 
7616 	if (rdev->num_crtc >= 4) {
7617 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7618 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7619 			       GRPH_PFLIP_INT_CLEAR);
7620 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7621 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7622 			       GRPH_PFLIP_INT_CLEAR);
7623 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7624 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7625 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7626 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7627 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7628 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7629 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7630 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7631 	}
7632 
7633 	if (rdev->num_crtc >= 6) {
7634 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7635 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7636 			       GRPH_PFLIP_INT_CLEAR);
7637 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7638 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7639 			       GRPH_PFLIP_INT_CLEAR);
7640 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7641 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7642 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7643 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7644 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7645 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7646 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7647 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7648 	}
7649 
7650 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7651 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7652 		tmp |= DC_HPDx_INT_ACK;
7653 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7654 	}
7655 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7656 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7657 		tmp |= DC_HPDx_INT_ACK;
7658 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7659 	}
7660 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7661 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7662 		tmp |= DC_HPDx_INT_ACK;
7663 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7664 	}
7665 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7666 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7667 		tmp |= DC_HPDx_INT_ACK;
7668 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7669 	}
7670 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7671 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7672 		tmp |= DC_HPDx_INT_ACK;
7673 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7674 	}
7675 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7676 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7677 		tmp |= DC_HPDx_INT_ACK;
7678 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7679 	}
7680 }
7681 
7682 /**
7683  * cik_irq_disable - disable interrupts
7684  *
7685  * @rdev: radeon_device pointer
7686  *
7687  * Disable interrupts on the hw (CIK).
7688  */
7689 static void cik_irq_disable(struct radeon_device *rdev)
7690 {
7691 	cik_disable_interrupts(rdev);
7692 	/* Wait and acknowledge irq */
7693 	mdelay(1);
7694 	cik_irq_ack(rdev);
7695 	cik_disable_interrupt_state(rdev);
7696 }
7697 
7698 /**
7699  * cik_irq_disable - disable interrupts for suspend
7700  *
7701  * @rdev: radeon_device pointer
7702  *
7703  * Disable interrupts and stop the RLC (CIK).
7704  * Used for suspend.
7705  */
7706 static void cik_irq_suspend(struct radeon_device *rdev)
7707 {
7708 	cik_irq_disable(rdev);
7709 	cik_rlc_stop(rdev);
7710 }
7711 
7712 /**
7713  * cik_irq_fini - tear down interrupt support
7714  *
7715  * @rdev: radeon_device pointer
7716  *
7717  * Disable interrupts on the hw and free the IH ring
7718  * buffer (CIK).
7719  * Used for driver unload.
7720  */
7721 static void cik_irq_fini(struct radeon_device *rdev)
7722 {
7723 	cik_irq_suspend(rdev);
7724 	r600_ih_ring_fini(rdev);
7725 }
7726 
7727 /**
7728  * cik_get_ih_wptr - get the IH ring buffer wptr
7729  *
7730  * @rdev: radeon_device pointer
7731  *
7732  * Get the IH ring buffer wptr from either the register
7733  * or the writeback memory buffer (CIK).  Also check for
7734  * ring buffer overflow and deal with it.
7735  * Used by cik_irq_process().
7736  * Returns the value of the wptr.
7737  */
7738 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7739 {
7740 	u32 wptr, tmp;
7741 
7742 	if (rdev->wb.enabled)
7743 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7744 	else
7745 		wptr = RREG32(IH_RB_WPTR);
7746 
7747 	if (wptr & RB_OVERFLOW) {
7748 		wptr &= ~RB_OVERFLOW;
7749 		/* When a ring buffer overflow happen start parsing interrupt
7750 		 * from the last not overwritten vector (wptr + 16). Hopefully
7751 		 * this should allow us to catchup.
7752 		 */
7753 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7754 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7755 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7756 		tmp = RREG32(IH_RB_CNTL);
7757 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7758 		WREG32(IH_RB_CNTL, tmp);
7759 	}
7760 	return (wptr & rdev->ih.ptr_mask);
7761 }
7762 
7763 /*        CIK IV Ring
7764  * Each IV ring entry is 128 bits:
7765  * [7:0]    - interrupt source id
7766  * [31:8]   - reserved
7767  * [59:32]  - interrupt source data
7768  * [63:60]  - reserved
7769  * [71:64]  - RINGID
7770  *            CP:
7771  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7772  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7773  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7774  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7775  *            PIPE_ID - ME0 0=3D
7776  *                    - ME1&2 compute dispatcher (4 pipes each)
7777  *            SDMA:
7778  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7779  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7780  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7781  * [79:72]  - VMID
7782  * [95:80]  - PASID
7783  * [127:96] - reserved
7784  */
7785 /**
7786  * cik_irq_process - interrupt handler
7787  *
7788  * @rdev: radeon_device pointer
7789  *
7790  * Interrupt hander (CIK).  Walk the IH ring,
7791  * ack interrupts and schedule work to handle
7792  * interrupt events.
7793  * Returns irq process return code.
7794  */
7795 int cik_irq_process(struct radeon_device *rdev)
7796 {
7797 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7798 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7799 	u32 wptr;
7800 	u32 rptr;
7801 	u32 src_id, src_data, ring_id;
7802 	u8 me_id, pipe_id, queue_id;
7803 	u32 ring_index;
7804 	bool queue_hotplug = false;
7805 	bool queue_reset = false;
7806 	u32 addr, status, mc_client;
7807 	bool queue_thermal = false;
7808 
7809 	if (!rdev->ih.enabled || rdev->shutdown)
7810 		return IRQ_NONE;
7811 
7812 	wptr = cik_get_ih_wptr(rdev);
7813 
7814 restart_ih:
7815 	/* is somebody else already processing irqs? */
7816 	if (atomic_xchg(&rdev->ih.lock, 1))
7817 		return IRQ_NONE;
7818 
7819 	rptr = rdev->ih.rptr;
7820 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7821 
7822 	/* Order reading of wptr vs. reading of IH ring data */
7823 	rmb();
7824 
7825 	/* display interrupts */
7826 	cik_irq_ack(rdev);
7827 
7828 	while (rptr != wptr) {
7829 		/* wptr/rptr are in bytes! */
7830 		ring_index = rptr / 4;
7831 
7832 		radeon_kfd_interrupt(rdev,
7833 				(const void *) &rdev->ih.ring[ring_index]);
7834 
7835 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7836 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7837 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7838 
7839 		switch (src_id) {
7840 		case 1: /* D1 vblank/vline */
7841 			switch (src_data) {
7842 			case 0: /* D1 vblank */
7843 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7844 					if (rdev->irq.crtc_vblank_int[0]) {
7845 						drm_handle_vblank(rdev->ddev, 0);
7846 						rdev->pm.vblank_sync = true;
7847 						wake_up(&rdev->irq.vblank_queue);
7848 					}
7849 					if (atomic_read(&rdev->irq.pflip[0]))
7850 						radeon_crtc_handle_vblank(rdev, 0);
7851 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7852 					DRM_DEBUG("IH: D1 vblank\n");
7853 				}
7854 				break;
7855 			case 1: /* D1 vline */
7856 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7857 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7858 					DRM_DEBUG("IH: D1 vline\n");
7859 				}
7860 				break;
7861 			default:
7862 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7863 				break;
7864 			}
7865 			break;
7866 		case 2: /* D2 vblank/vline */
7867 			switch (src_data) {
7868 			case 0: /* D2 vblank */
7869 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7870 					if (rdev->irq.crtc_vblank_int[1]) {
7871 						drm_handle_vblank(rdev->ddev, 1);
7872 						rdev->pm.vblank_sync = true;
7873 						wake_up(&rdev->irq.vblank_queue);
7874 					}
7875 					if (atomic_read(&rdev->irq.pflip[1]))
7876 						radeon_crtc_handle_vblank(rdev, 1);
7877 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7878 					DRM_DEBUG("IH: D2 vblank\n");
7879 				}
7880 				break;
7881 			case 1: /* D2 vline */
7882 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7883 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7884 					DRM_DEBUG("IH: D2 vline\n");
7885 				}
7886 				break;
7887 			default:
7888 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7889 				break;
7890 			}
7891 			break;
7892 		case 3: /* D3 vblank/vline */
7893 			switch (src_data) {
7894 			case 0: /* D3 vblank */
7895 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7896 					if (rdev->irq.crtc_vblank_int[2]) {
7897 						drm_handle_vblank(rdev->ddev, 2);
7898 						rdev->pm.vblank_sync = true;
7899 						wake_up(&rdev->irq.vblank_queue);
7900 					}
7901 					if (atomic_read(&rdev->irq.pflip[2]))
7902 						radeon_crtc_handle_vblank(rdev, 2);
7903 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7904 					DRM_DEBUG("IH: D3 vblank\n");
7905 				}
7906 				break;
7907 			case 1: /* D3 vline */
7908 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7909 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7910 					DRM_DEBUG("IH: D3 vline\n");
7911 				}
7912 				break;
7913 			default:
7914 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7915 				break;
7916 			}
7917 			break;
7918 		case 4: /* D4 vblank/vline */
7919 			switch (src_data) {
7920 			case 0: /* D4 vblank */
7921 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7922 					if (rdev->irq.crtc_vblank_int[3]) {
7923 						drm_handle_vblank(rdev->ddev, 3);
7924 						rdev->pm.vblank_sync = true;
7925 						wake_up(&rdev->irq.vblank_queue);
7926 					}
7927 					if (atomic_read(&rdev->irq.pflip[3]))
7928 						radeon_crtc_handle_vblank(rdev, 3);
7929 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7930 					DRM_DEBUG("IH: D4 vblank\n");
7931 				}
7932 				break;
7933 			case 1: /* D4 vline */
7934 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7935 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7936 					DRM_DEBUG("IH: D4 vline\n");
7937 				}
7938 				break;
7939 			default:
7940 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7941 				break;
7942 			}
7943 			break;
7944 		case 5: /* D5 vblank/vline */
7945 			switch (src_data) {
7946 			case 0: /* D5 vblank */
7947 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7948 					if (rdev->irq.crtc_vblank_int[4]) {
7949 						drm_handle_vblank(rdev->ddev, 4);
7950 						rdev->pm.vblank_sync = true;
7951 						wake_up(&rdev->irq.vblank_queue);
7952 					}
7953 					if (atomic_read(&rdev->irq.pflip[4]))
7954 						radeon_crtc_handle_vblank(rdev, 4);
7955 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7956 					DRM_DEBUG("IH: D5 vblank\n");
7957 				}
7958 				break;
7959 			case 1: /* D5 vline */
7960 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7961 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7962 					DRM_DEBUG("IH: D5 vline\n");
7963 				}
7964 				break;
7965 			default:
7966 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7967 				break;
7968 			}
7969 			break;
7970 		case 6: /* D6 vblank/vline */
7971 			switch (src_data) {
7972 			case 0: /* D6 vblank */
7973 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7974 					if (rdev->irq.crtc_vblank_int[5]) {
7975 						drm_handle_vblank(rdev->ddev, 5);
7976 						rdev->pm.vblank_sync = true;
7977 						wake_up(&rdev->irq.vblank_queue);
7978 					}
7979 					if (atomic_read(&rdev->irq.pflip[5]))
7980 						radeon_crtc_handle_vblank(rdev, 5);
7981 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7982 					DRM_DEBUG("IH: D6 vblank\n");
7983 				}
7984 				break;
7985 			case 1: /* D6 vline */
7986 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7987 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7988 					DRM_DEBUG("IH: D6 vline\n");
7989 				}
7990 				break;
7991 			default:
7992 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7993 				break;
7994 			}
7995 			break;
7996 		case 8: /* D1 page flip */
7997 		case 10: /* D2 page flip */
7998 		case 12: /* D3 page flip */
7999 		case 14: /* D4 page flip */
8000 		case 16: /* D5 page flip */
8001 		case 18: /* D6 page flip */
8002 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8003 			if (radeon_use_pflipirq > 0)
8004 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8005 			break;
8006 		case 42: /* HPD hotplug */
8007 			switch (src_data) {
8008 			case 0:
8009 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8010 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8011 					queue_hotplug = true;
8012 					DRM_DEBUG("IH: HPD1\n");
8013 				}
8014 				break;
8015 			case 1:
8016 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8017 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8018 					queue_hotplug = true;
8019 					DRM_DEBUG("IH: HPD2\n");
8020 				}
8021 				break;
8022 			case 2:
8023 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8024 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8025 					queue_hotplug = true;
8026 					DRM_DEBUG("IH: HPD3\n");
8027 				}
8028 				break;
8029 			case 3:
8030 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8031 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8032 					queue_hotplug = true;
8033 					DRM_DEBUG("IH: HPD4\n");
8034 				}
8035 				break;
8036 			case 4:
8037 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8038 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8039 					queue_hotplug = true;
8040 					DRM_DEBUG("IH: HPD5\n");
8041 				}
8042 				break;
8043 			case 5:
8044 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8045 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8046 					queue_hotplug = true;
8047 					DRM_DEBUG("IH: HPD6\n");
8048 				}
8049 				break;
8050 			default:
8051 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8052 				break;
8053 			}
8054 			break;
8055 		case 124: /* UVD */
8056 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8057 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8058 			break;
8059 		case 146:
8060 		case 147:
8061 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8062 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8063 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8064 			/* reset addr and status */
8065 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8066 			if (addr == 0x0 && status == 0x0)
8067 				break;
8068 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8069 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8070 				addr);
8071 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8072 				status);
8073 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8074 			break;
8075 		case 167: /* VCE */
8076 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8077 			switch (src_data) {
8078 			case 0:
8079 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8080 				break;
8081 			case 1:
8082 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8083 				break;
8084 			default:
8085 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8086 				break;
8087 			}
8088 			break;
8089 		case 176: /* GFX RB CP_INT */
8090 		case 177: /* GFX IB CP_INT */
8091 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8092 			break;
8093 		case 181: /* CP EOP event */
8094 			DRM_DEBUG("IH: CP EOP\n");
8095 			/* XXX check the bitfield order! */
8096 			me_id = (ring_id & 0x60) >> 5;
8097 			pipe_id = (ring_id & 0x18) >> 3;
8098 			queue_id = (ring_id & 0x7) >> 0;
8099 			switch (me_id) {
8100 			case 0:
8101 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8102 				break;
8103 			case 1:
8104 			case 2:
8105 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8106 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8107 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8108 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8109 				break;
8110 			}
8111 			break;
8112 		case 184: /* CP Privileged reg access */
8113 			DRM_ERROR("Illegal register access in command stream\n");
8114 			/* XXX check the bitfield order! */
8115 			me_id = (ring_id & 0x60) >> 5;
8116 			pipe_id = (ring_id & 0x18) >> 3;
8117 			queue_id = (ring_id & 0x7) >> 0;
8118 			switch (me_id) {
8119 			case 0:
8120 				/* This results in a full GPU reset, but all we need to do is soft
8121 				 * reset the CP for gfx
8122 				 */
8123 				queue_reset = true;
8124 				break;
8125 			case 1:
8126 				/* XXX compute */
8127 				queue_reset = true;
8128 				break;
8129 			case 2:
8130 				/* XXX compute */
8131 				queue_reset = true;
8132 				break;
8133 			}
8134 			break;
8135 		case 185: /* CP Privileged inst */
8136 			DRM_ERROR("Illegal instruction in command stream\n");
8137 			/* XXX check the bitfield order! */
8138 			me_id = (ring_id & 0x60) >> 5;
8139 			pipe_id = (ring_id & 0x18) >> 3;
8140 			queue_id = (ring_id & 0x7) >> 0;
8141 			switch (me_id) {
8142 			case 0:
8143 				/* This results in a full GPU reset, but all we need to do is soft
8144 				 * reset the CP for gfx
8145 				 */
8146 				queue_reset = true;
8147 				break;
8148 			case 1:
8149 				/* XXX compute */
8150 				queue_reset = true;
8151 				break;
8152 			case 2:
8153 				/* XXX compute */
8154 				queue_reset = true;
8155 				break;
8156 			}
8157 			break;
8158 		case 224: /* SDMA trap event */
8159 			/* XXX check the bitfield order! */
8160 			me_id = (ring_id & 0x3) >> 0;
8161 			queue_id = (ring_id & 0xc) >> 2;
8162 			DRM_DEBUG("IH: SDMA trap\n");
8163 			switch (me_id) {
8164 			case 0:
8165 				switch (queue_id) {
8166 				case 0:
8167 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8168 					break;
8169 				case 1:
8170 					/* XXX compute */
8171 					break;
8172 				case 2:
8173 					/* XXX compute */
8174 					break;
8175 				}
8176 				break;
8177 			case 1:
8178 				switch (queue_id) {
8179 				case 0:
8180 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8181 					break;
8182 				case 1:
8183 					/* XXX compute */
8184 					break;
8185 				case 2:
8186 					/* XXX compute */
8187 					break;
8188 				}
8189 				break;
8190 			}
8191 			break;
8192 		case 230: /* thermal low to high */
8193 			DRM_DEBUG("IH: thermal low to high\n");
8194 			rdev->pm.dpm.thermal.high_to_low = false;
8195 			queue_thermal = true;
8196 			break;
8197 		case 231: /* thermal high to low */
8198 			DRM_DEBUG("IH: thermal high to low\n");
8199 			rdev->pm.dpm.thermal.high_to_low = true;
8200 			queue_thermal = true;
8201 			break;
8202 		case 233: /* GUI IDLE */
8203 			DRM_DEBUG("IH: GUI idle\n");
8204 			break;
8205 		case 241: /* SDMA Privileged inst */
8206 		case 247: /* SDMA Privileged inst */
8207 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8208 			/* XXX check the bitfield order! */
8209 			me_id = (ring_id & 0x3) >> 0;
8210 			queue_id = (ring_id & 0xc) >> 2;
8211 			switch (me_id) {
8212 			case 0:
8213 				switch (queue_id) {
8214 				case 0:
8215 					queue_reset = true;
8216 					break;
8217 				case 1:
8218 					/* XXX compute */
8219 					queue_reset = true;
8220 					break;
8221 				case 2:
8222 					/* XXX compute */
8223 					queue_reset = true;
8224 					break;
8225 				}
8226 				break;
8227 			case 1:
8228 				switch (queue_id) {
8229 				case 0:
8230 					queue_reset = true;
8231 					break;
8232 				case 1:
8233 					/* XXX compute */
8234 					queue_reset = true;
8235 					break;
8236 				case 2:
8237 					/* XXX compute */
8238 					queue_reset = true;
8239 					break;
8240 				}
8241 				break;
8242 			}
8243 			break;
8244 		default:
8245 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8246 			break;
8247 		}
8248 
8249 		/* wptr/rptr are in bytes! */
8250 		rptr += 16;
8251 		rptr &= rdev->ih.ptr_mask;
8252 		WREG32(IH_RB_RPTR, rptr);
8253 	}
8254 	if (queue_hotplug)
8255 		schedule_work(&rdev->hotplug_work);
8256 	if (queue_reset) {
8257 		rdev->needs_reset = true;
8258 		wake_up_all(&rdev->fence_queue);
8259 	}
8260 	if (queue_thermal)
8261 		schedule_work(&rdev->pm.dpm.thermal.work);
8262 	rdev->ih.rptr = rptr;
8263 	atomic_set(&rdev->ih.lock, 0);
8264 
8265 	/* make sure wptr hasn't changed while processing */
8266 	wptr = cik_get_ih_wptr(rdev);
8267 	if (wptr != rptr)
8268 		goto restart_ih;
8269 
8270 	return IRQ_HANDLED;
8271 }
8272 
8273 /*
8274  * startup/shutdown callbacks
8275  */
8276 /**
8277  * cik_startup - program the asic to a functional state
8278  *
8279  * @rdev: radeon_device pointer
8280  *
8281  * Programs the asic to a functional state (CIK).
8282  * Called by cik_init() and cik_resume().
8283  * Returns 0 for success, error for failure.
8284  */
8285 static int cik_startup(struct radeon_device *rdev)
8286 {
8287 	struct radeon_ring *ring;
8288 	u32 nop;
8289 	int r;
8290 
8291 	/* enable pcie gen2/3 link */
8292 	cik_pcie_gen3_enable(rdev);
8293 	/* enable aspm */
8294 	cik_program_aspm(rdev);
8295 
8296 	/* scratch needs to be initialized before MC */
8297 	r = r600_vram_scratch_init(rdev);
8298 	if (r)
8299 		return r;
8300 
8301 	cik_mc_program(rdev);
8302 
8303 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8304 		r = ci_mc_load_microcode(rdev);
8305 		if (r) {
8306 			DRM_ERROR("Failed to load MC firmware!\n");
8307 			return r;
8308 		}
8309 	}
8310 
8311 	r = cik_pcie_gart_enable(rdev);
8312 	if (r)
8313 		return r;
8314 	cik_gpu_init(rdev);
8315 
8316 	/* allocate rlc buffers */
8317 	if (rdev->flags & RADEON_IS_IGP) {
8318 		if (rdev->family == CHIP_KAVERI) {
8319 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8320 			rdev->rlc.reg_list_size =
8321 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8322 		} else {
8323 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8324 			rdev->rlc.reg_list_size =
8325 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8326 		}
8327 	}
8328 	rdev->rlc.cs_data = ci_cs_data;
8329 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8330 	r = sumo_rlc_init(rdev);
8331 	if (r) {
8332 		DRM_ERROR("Failed to init rlc BOs!\n");
8333 		return r;
8334 	}
8335 
8336 	/* allocate wb buffer */
8337 	r = radeon_wb_init(rdev);
8338 	if (r)
8339 		return r;
8340 
8341 	/* allocate mec buffers */
8342 	r = cik_mec_init(rdev);
8343 	if (r) {
8344 		DRM_ERROR("Failed to init MEC BOs!\n");
8345 		return r;
8346 	}
8347 
8348 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8349 	if (r) {
8350 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8351 		return r;
8352 	}
8353 
8354 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8355 	if (r) {
8356 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8357 		return r;
8358 	}
8359 
8360 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8361 	if (r) {
8362 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8363 		return r;
8364 	}
8365 
8366 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8367 	if (r) {
8368 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8369 		return r;
8370 	}
8371 
8372 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8373 	if (r) {
8374 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8375 		return r;
8376 	}
8377 
8378 	r = radeon_uvd_resume(rdev);
8379 	if (!r) {
8380 		r = uvd_v4_2_resume(rdev);
8381 		if (!r) {
8382 			r = radeon_fence_driver_start_ring(rdev,
8383 							   R600_RING_TYPE_UVD_INDEX);
8384 			if (r)
8385 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8386 		}
8387 	}
8388 	if (r)
8389 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8390 
8391 	r = radeon_vce_resume(rdev);
8392 	if (!r) {
8393 		r = vce_v2_0_resume(rdev);
8394 		if (!r)
8395 			r = radeon_fence_driver_start_ring(rdev,
8396 							   TN_RING_TYPE_VCE1_INDEX);
8397 		if (!r)
8398 			r = radeon_fence_driver_start_ring(rdev,
8399 							   TN_RING_TYPE_VCE2_INDEX);
8400 	}
8401 	if (r) {
8402 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8403 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8404 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8405 	}
8406 
8407 	/* Enable IRQ */
8408 	if (!rdev->irq.installed) {
8409 		r = radeon_irq_kms_init(rdev);
8410 		if (r)
8411 			return r;
8412 	}
8413 
8414 	r = cik_irq_init(rdev);
8415 	if (r) {
8416 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8417 		radeon_irq_kms_fini(rdev);
8418 		return r;
8419 	}
8420 	cik_irq_set(rdev);
8421 
8422 	if (rdev->family == CHIP_HAWAII) {
8423 		if (rdev->new_fw)
8424 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8425 		else
8426 			nop = RADEON_CP_PACKET2;
8427 	} else {
8428 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8429 	}
8430 
8431 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8432 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8433 			     nop);
8434 	if (r)
8435 		return r;
8436 
8437 	/* set up the compute queues */
8438 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8439 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8440 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8441 			     nop);
8442 	if (r)
8443 		return r;
8444 	ring->me = 1; /* first MEC */
8445 	ring->pipe = 0; /* first pipe */
8446 	ring->queue = 0; /* first queue */
8447 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8448 
8449 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8450 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8451 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8452 			     nop);
8453 	if (r)
8454 		return r;
8455 	/* dGPU only have 1 MEC */
8456 	ring->me = 1; /* first MEC */
8457 	ring->pipe = 0; /* first pipe */
8458 	ring->queue = 1; /* second queue */
8459 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8460 
8461 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8462 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8463 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8464 	if (r)
8465 		return r;
8466 
8467 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8468 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8469 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8470 	if (r)
8471 		return r;
8472 
8473 	r = cik_cp_resume(rdev);
8474 	if (r)
8475 		return r;
8476 
8477 	r = cik_sdma_resume(rdev);
8478 	if (r)
8479 		return r;
8480 
8481 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8482 	if (ring->ring_size) {
8483 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8484 				     RADEON_CP_PACKET2);
8485 		if (!r)
8486 			r = uvd_v1_0_init(rdev);
8487 		if (r)
8488 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8489 	}
8490 
8491 	r = -ENOENT;
8492 
8493 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8494 	if (ring->ring_size)
8495 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8496 				     VCE_CMD_NO_OP);
8497 
8498 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8499 	if (ring->ring_size)
8500 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8501 				     VCE_CMD_NO_OP);
8502 
8503 	if (!r)
8504 		r = vce_v1_0_init(rdev);
8505 	else if (r != -ENOENT)
8506 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8507 
8508 	r = radeon_ib_pool_init(rdev);
8509 	if (r) {
8510 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8511 		return r;
8512 	}
8513 
8514 	r = radeon_vm_manager_init(rdev);
8515 	if (r) {
8516 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8517 		return r;
8518 	}
8519 
8520 	r = dce6_audio_init(rdev);
8521 	if (r)
8522 		return r;
8523 
8524 	r = radeon_kfd_resume(rdev);
8525 	if (r)
8526 		return r;
8527 
8528 	return 0;
8529 }
8530 
8531 /**
8532  * cik_resume - resume the asic to a functional state
8533  *
8534  * @rdev: radeon_device pointer
8535  *
8536  * Programs the asic to a functional state (CIK).
8537  * Called at resume.
8538  * Returns 0 for success, error for failure.
8539  */
8540 int cik_resume(struct radeon_device *rdev)
8541 {
8542 	int r;
8543 
8544 	/* post card */
8545 	atom_asic_init(rdev->mode_info.atom_context);
8546 
8547 	/* init golden registers */
8548 	cik_init_golden_registers(rdev);
8549 
8550 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8551 		radeon_pm_resume(rdev);
8552 
8553 	rdev->accel_working = true;
8554 	r = cik_startup(rdev);
8555 	if (r) {
8556 		DRM_ERROR("cik startup failed on resume\n");
8557 		rdev->accel_working = false;
8558 		return r;
8559 	}
8560 
8561 	return r;
8562 
8563 }
8564 
8565 /**
8566  * cik_suspend - suspend the asic
8567  *
8568  * @rdev: radeon_device pointer
8569  *
8570  * Bring the chip into a state suitable for suspend (CIK).
8571  * Called at suspend.
8572  * Returns 0 for success.
8573  */
8574 int cik_suspend(struct radeon_device *rdev)
8575 {
8576 	radeon_kfd_suspend(rdev);
8577 	radeon_pm_suspend(rdev);
8578 	dce6_audio_fini(rdev);
8579 	radeon_vm_manager_fini(rdev);
8580 	cik_cp_enable(rdev, false);
8581 	cik_sdma_enable(rdev, false);
8582 	uvd_v1_0_fini(rdev);
8583 	radeon_uvd_suspend(rdev);
8584 	radeon_vce_suspend(rdev);
8585 	cik_fini_pg(rdev);
8586 	cik_fini_cg(rdev);
8587 	cik_irq_suspend(rdev);
8588 	radeon_wb_disable(rdev);
8589 	cik_pcie_gart_disable(rdev);
8590 	return 0;
8591 }
8592 
8593 /* Plan is to move initialization in that function and use
8594  * helper function so that radeon_device_init pretty much
8595  * do nothing more than calling asic specific function. This
8596  * should also allow to remove a bunch of callback function
8597  * like vram_info.
8598  */
8599 /**
8600  * cik_init - asic specific driver and hw init
8601  *
8602  * @rdev: radeon_device pointer
8603  *
8604  * Setup asic specific driver variables and program the hw
8605  * to a functional state (CIK).
8606  * Called at driver startup.
8607  * Returns 0 for success, errors for failure.
8608  */
8609 int cik_init(struct radeon_device *rdev)
8610 {
8611 	struct radeon_ring *ring;
8612 	int r;
8613 
8614 	/* Read BIOS */
8615 	if (!radeon_get_bios(rdev)) {
8616 		if (ASIC_IS_AVIVO(rdev))
8617 			return -EINVAL;
8618 	}
8619 	/* Must be an ATOMBIOS */
8620 	if (!rdev->is_atom_bios) {
8621 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8622 		return -EINVAL;
8623 	}
8624 	r = radeon_atombios_init(rdev);
8625 	if (r)
8626 		return r;
8627 
8628 	/* Post card if necessary */
8629 	if (!radeon_card_posted(rdev)) {
8630 		if (!rdev->bios) {
8631 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8632 			return -EINVAL;
8633 		}
8634 		DRM_INFO("GPU not posted. posting now...\n");
8635 		atom_asic_init(rdev->mode_info.atom_context);
8636 	}
8637 	/* init golden registers */
8638 	cik_init_golden_registers(rdev);
8639 	/* Initialize scratch registers */
8640 	cik_scratch_init(rdev);
8641 	/* Initialize surface registers */
8642 	radeon_surface_init(rdev);
8643 	/* Initialize clocks */
8644 	radeon_get_clock_info(rdev->ddev);
8645 
8646 	/* Fence driver */
8647 	r = radeon_fence_driver_init(rdev);
8648 	if (r)
8649 		return r;
8650 
8651 	/* initialize memory controller */
8652 	r = cik_mc_init(rdev);
8653 	if (r)
8654 		return r;
8655 	/* Memory manager */
8656 	r = radeon_bo_init(rdev);
8657 	if (r)
8658 		return r;
8659 
8660 	if (rdev->flags & RADEON_IS_IGP) {
8661 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8662 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8663 			r = cik_init_microcode(rdev);
8664 			if (r) {
8665 				DRM_ERROR("Failed to load firmware!\n");
8666 				return r;
8667 			}
8668 		}
8669 	} else {
8670 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8671 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8672 		    !rdev->mc_fw) {
8673 			r = cik_init_microcode(rdev);
8674 			if (r) {
8675 				DRM_ERROR("Failed to load firmware!\n");
8676 				return r;
8677 			}
8678 		}
8679 	}
8680 
8681 	/* Initialize power management */
8682 	radeon_pm_init(rdev);
8683 
8684 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8685 	ring->ring_obj = NULL;
8686 	r600_ring_init(rdev, ring, 1024 * 1024);
8687 
8688 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8689 	ring->ring_obj = NULL;
8690 	r600_ring_init(rdev, ring, 1024 * 1024);
8691 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8692 	if (r)
8693 		return r;
8694 
8695 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8696 	ring->ring_obj = NULL;
8697 	r600_ring_init(rdev, ring, 1024 * 1024);
8698 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8699 	if (r)
8700 		return r;
8701 
8702 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8703 	ring->ring_obj = NULL;
8704 	r600_ring_init(rdev, ring, 256 * 1024);
8705 
8706 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8707 	ring->ring_obj = NULL;
8708 	r600_ring_init(rdev, ring, 256 * 1024);
8709 
8710 	r = radeon_uvd_init(rdev);
8711 	if (!r) {
8712 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8713 		ring->ring_obj = NULL;
8714 		r600_ring_init(rdev, ring, 4096);
8715 	}
8716 
8717 	r = radeon_vce_init(rdev);
8718 	if (!r) {
8719 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8720 		ring->ring_obj = NULL;
8721 		r600_ring_init(rdev, ring, 4096);
8722 
8723 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8724 		ring->ring_obj = NULL;
8725 		r600_ring_init(rdev, ring, 4096);
8726 	}
8727 
8728 	rdev->ih.ring_obj = NULL;
8729 	r600_ih_ring_init(rdev, 64 * 1024);
8730 
8731 	r = r600_pcie_gart_init(rdev);
8732 	if (r)
8733 		return r;
8734 
8735 	rdev->accel_working = true;
8736 	r = cik_startup(rdev);
8737 	if (r) {
8738 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8739 		cik_cp_fini(rdev);
8740 		cik_sdma_fini(rdev);
8741 		cik_irq_fini(rdev);
8742 		sumo_rlc_fini(rdev);
8743 		cik_mec_fini(rdev);
8744 		radeon_wb_fini(rdev);
8745 		radeon_ib_pool_fini(rdev);
8746 		radeon_vm_manager_fini(rdev);
8747 		radeon_irq_kms_fini(rdev);
8748 		cik_pcie_gart_fini(rdev);
8749 		rdev->accel_working = false;
8750 	}
8751 
8752 	/* Don't start up if the MC ucode is missing.
8753 	 * The default clocks and voltages before the MC ucode
8754 	 * is loaded are not suffient for advanced operations.
8755 	 */
8756 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8757 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8758 		return -EINVAL;
8759 	}
8760 
8761 	return 0;
8762 }
8763 
8764 /**
8765  * cik_fini - asic specific driver and hw fini
8766  *
8767  * @rdev: radeon_device pointer
8768  *
8769  * Tear down the asic specific driver variables and program the hw
8770  * to an idle state (CIK).
8771  * Called at driver unload.
8772  */
8773 void cik_fini(struct radeon_device *rdev)
8774 {
8775 	radeon_pm_fini(rdev);
8776 	cik_cp_fini(rdev);
8777 	cik_sdma_fini(rdev);
8778 	cik_fini_pg(rdev);
8779 	cik_fini_cg(rdev);
8780 	cik_irq_fini(rdev);
8781 	sumo_rlc_fini(rdev);
8782 	cik_mec_fini(rdev);
8783 	radeon_wb_fini(rdev);
8784 	radeon_vm_manager_fini(rdev);
8785 	radeon_ib_pool_fini(rdev);
8786 	radeon_irq_kms_fini(rdev);
8787 	uvd_v1_0_fini(rdev);
8788 	radeon_uvd_fini(rdev);
8789 	radeon_vce_fini(rdev);
8790 	cik_pcie_gart_fini(rdev);
8791 	r600_vram_scratch_fini(rdev);
8792 	radeon_gem_fini(rdev);
8793 	radeon_fence_driver_fini(rdev);
8794 	radeon_bo_fini(rdev);
8795 	radeon_atombios_fini(rdev);
8796 	kfree(rdev->bios);
8797 	rdev->bios = NULL;
8798 }
8799 
8800 void dce8_program_fmt(struct drm_encoder *encoder)
8801 {
8802 	struct drm_device *dev = encoder->dev;
8803 	struct radeon_device *rdev = dev->dev_private;
8804 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8805 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8806 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8807 	int bpc = 0;
8808 	u32 tmp = 0;
8809 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8810 
8811 	if (connector) {
8812 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8813 		bpc = radeon_get_monitor_bpc(connector);
8814 		dither = radeon_connector->dither;
8815 	}
8816 
8817 	/* LVDS/eDP FMT is set up by atom */
8818 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8819 		return;
8820 
8821 	/* not needed for analog */
8822 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8823 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8824 		return;
8825 
8826 	if (bpc == 0)
8827 		return;
8828 
8829 	switch (bpc) {
8830 	case 6:
8831 		if (dither == RADEON_FMT_DITHER_ENABLE)
8832 			/* XXX sort out optimal dither settings */
8833 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8834 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8835 		else
8836 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8837 		break;
8838 	case 8:
8839 		if (dither == RADEON_FMT_DITHER_ENABLE)
8840 			/* XXX sort out optimal dither settings */
8841 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8842 				FMT_RGB_RANDOM_ENABLE |
8843 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8844 		else
8845 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8846 		break;
8847 	case 10:
8848 		if (dither == RADEON_FMT_DITHER_ENABLE)
8849 			/* XXX sort out optimal dither settings */
8850 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8851 				FMT_RGB_RANDOM_ENABLE |
8852 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8853 		else
8854 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8855 		break;
8856 	default:
8857 		/* not needed */
8858 		break;
8859 	}
8860 
8861 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8862 }
8863 
8864 /* display watermark setup */
8865 /**
8866  * dce8_line_buffer_adjust - Set up the line buffer
8867  *
8868  * @rdev: radeon_device pointer
8869  * @radeon_crtc: the selected display controller
8870  * @mode: the current display mode on the selected display
8871  * controller
8872  *
8873  * Setup up the line buffer allocation for
8874  * the selected display controller (CIK).
8875  * Returns the line buffer size in pixels.
8876  */
8877 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8878 				   struct radeon_crtc *radeon_crtc,
8879 				   struct drm_display_mode *mode)
8880 {
8881 	u32 tmp, buffer_alloc, i;
8882 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8883 	/*
8884 	 * Line Buffer Setup
8885 	 * There are 6 line buffers, one for each display controllers.
8886 	 * There are 3 partitions per LB. Select the number of partitions
8887 	 * to enable based on the display width.  For display widths larger
8888 	 * than 4096, you need use to use 2 display controllers and combine
8889 	 * them using the stereo blender.
8890 	 */
8891 	if (radeon_crtc->base.enabled && mode) {
8892 		if (mode->crtc_hdisplay < 1920) {
8893 			tmp = 1;
8894 			buffer_alloc = 2;
8895 		} else if (mode->crtc_hdisplay < 2560) {
8896 			tmp = 2;
8897 			buffer_alloc = 2;
8898 		} else if (mode->crtc_hdisplay < 4096) {
8899 			tmp = 0;
8900 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8901 		} else {
8902 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8903 			tmp = 0;
8904 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8905 		}
8906 	} else {
8907 		tmp = 1;
8908 		buffer_alloc = 0;
8909 	}
8910 
8911 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8912 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8913 
8914 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8915 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8916 	for (i = 0; i < rdev->usec_timeout; i++) {
8917 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8918 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8919 			break;
8920 		udelay(1);
8921 	}
8922 
8923 	if (radeon_crtc->base.enabled && mode) {
8924 		switch (tmp) {
8925 		case 0:
8926 		default:
8927 			return 4096 * 2;
8928 		case 1:
8929 			return 1920 * 2;
8930 		case 2:
8931 			return 2560 * 2;
8932 		}
8933 	}
8934 
8935 	/* controller not enabled, so no lb used */
8936 	return 0;
8937 }
8938 
8939 /**
8940  * cik_get_number_of_dram_channels - get the number of dram channels
8941  *
8942  * @rdev: radeon_device pointer
8943  *
8944  * Look up the number of video ram channels (CIK).
8945  * Used for display watermark bandwidth calculations
8946  * Returns the number of dram channels
8947  */
8948 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8949 {
8950 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8951 
8952 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8953 	case 0:
8954 	default:
8955 		return 1;
8956 	case 1:
8957 		return 2;
8958 	case 2:
8959 		return 4;
8960 	case 3:
8961 		return 8;
8962 	case 4:
8963 		return 3;
8964 	case 5:
8965 		return 6;
8966 	case 6:
8967 		return 10;
8968 	case 7:
8969 		return 12;
8970 	case 8:
8971 		return 16;
8972 	}
8973 }
8974 
8975 struct dce8_wm_params {
8976 	u32 dram_channels; /* number of dram channels */
8977 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8978 	u32 sclk;          /* engine clock in kHz */
8979 	u32 disp_clk;      /* display clock in kHz */
8980 	u32 src_width;     /* viewport width */
8981 	u32 active_time;   /* active display time in ns */
8982 	u32 blank_time;    /* blank time in ns */
8983 	bool interlaced;    /* mode is interlaced */
8984 	fixed20_12 vsc;    /* vertical scale ratio */
8985 	u32 num_heads;     /* number of active crtcs */
8986 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8987 	u32 lb_size;       /* line buffer allocated to pipe */
8988 	u32 vtaps;         /* vertical scaler taps */
8989 };
8990 
8991 /**
8992  * dce8_dram_bandwidth - get the dram bandwidth
8993  *
8994  * @wm: watermark calculation data
8995  *
8996  * Calculate the raw dram bandwidth (CIK).
8997  * Used for display watermark bandwidth calculations
8998  * Returns the dram bandwidth in MBytes/s
8999  */
9000 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9001 {
9002 	/* Calculate raw DRAM Bandwidth */
9003 	fixed20_12 dram_efficiency; /* 0.7 */
9004 	fixed20_12 yclk, dram_channels, bandwidth;
9005 	fixed20_12 a;
9006 
9007 	a.full = dfixed_const(1000);
9008 	yclk.full = dfixed_const(wm->yclk);
9009 	yclk.full = dfixed_div(yclk, a);
9010 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9011 	a.full = dfixed_const(10);
9012 	dram_efficiency.full = dfixed_const(7);
9013 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9014 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9015 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9016 
9017 	return dfixed_trunc(bandwidth);
9018 }
9019 
9020 /**
9021  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9022  *
9023  * @wm: watermark calculation data
9024  *
9025  * Calculate the dram bandwidth used for display (CIK).
9026  * Used for display watermark bandwidth calculations
9027  * Returns the dram bandwidth for display in MBytes/s
9028  */
9029 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9030 {
9031 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9032 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9033 	fixed20_12 yclk, dram_channels, bandwidth;
9034 	fixed20_12 a;
9035 
9036 	a.full = dfixed_const(1000);
9037 	yclk.full = dfixed_const(wm->yclk);
9038 	yclk.full = dfixed_div(yclk, a);
9039 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9040 	a.full = dfixed_const(10);
9041 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9042 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9043 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9044 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9045 
9046 	return dfixed_trunc(bandwidth);
9047 }
9048 
9049 /**
9050  * dce8_data_return_bandwidth - get the data return bandwidth
9051  *
9052  * @wm: watermark calculation data
9053  *
9054  * Calculate the data return bandwidth used for display (CIK).
9055  * Used for display watermark bandwidth calculations
9056  * Returns the data return bandwidth in MBytes/s
9057  */
9058 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9059 {
9060 	/* Calculate the display Data return Bandwidth */
9061 	fixed20_12 return_efficiency; /* 0.8 */
9062 	fixed20_12 sclk, bandwidth;
9063 	fixed20_12 a;
9064 
9065 	a.full = dfixed_const(1000);
9066 	sclk.full = dfixed_const(wm->sclk);
9067 	sclk.full = dfixed_div(sclk, a);
9068 	a.full = dfixed_const(10);
9069 	return_efficiency.full = dfixed_const(8);
9070 	return_efficiency.full = dfixed_div(return_efficiency, a);
9071 	a.full = dfixed_const(32);
9072 	bandwidth.full = dfixed_mul(a, sclk);
9073 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9074 
9075 	return dfixed_trunc(bandwidth);
9076 }
9077 
9078 /**
9079  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9080  *
9081  * @wm: watermark calculation data
9082  *
9083  * Calculate the dmif bandwidth used for display (CIK).
9084  * Used for display watermark bandwidth calculations
9085  * Returns the dmif bandwidth in MBytes/s
9086  */
9087 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9088 {
9089 	/* Calculate the DMIF Request Bandwidth */
9090 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9091 	fixed20_12 disp_clk, bandwidth;
9092 	fixed20_12 a, b;
9093 
9094 	a.full = dfixed_const(1000);
9095 	disp_clk.full = dfixed_const(wm->disp_clk);
9096 	disp_clk.full = dfixed_div(disp_clk, a);
9097 	a.full = dfixed_const(32);
9098 	b.full = dfixed_mul(a, disp_clk);
9099 
9100 	a.full = dfixed_const(10);
9101 	disp_clk_request_efficiency.full = dfixed_const(8);
9102 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9103 
9104 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9105 
9106 	return dfixed_trunc(bandwidth);
9107 }
9108 
9109 /**
9110  * dce8_available_bandwidth - get the min available bandwidth
9111  *
9112  * @wm: watermark calculation data
9113  *
9114  * Calculate the min available bandwidth used for display (CIK).
9115  * Used for display watermark bandwidth calculations
9116  * Returns the min available bandwidth in MBytes/s
9117  */
9118 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9119 {
9120 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9121 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9122 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9123 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9124 
9125 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9126 }
9127 
9128 /**
9129  * dce8_average_bandwidth - get the average available bandwidth
9130  *
9131  * @wm: watermark calculation data
9132  *
9133  * Calculate the average available bandwidth used for display (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the average available bandwidth in MBytes/s
9136  */
9137 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9138 {
9139 	/* Calculate the display mode Average Bandwidth
9140 	 * DisplayMode should contain the source and destination dimensions,
9141 	 * timing, etc.
9142 	 */
9143 	fixed20_12 bpp;
9144 	fixed20_12 line_time;
9145 	fixed20_12 src_width;
9146 	fixed20_12 bandwidth;
9147 	fixed20_12 a;
9148 
9149 	a.full = dfixed_const(1000);
9150 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9151 	line_time.full = dfixed_div(line_time, a);
9152 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9153 	src_width.full = dfixed_const(wm->src_width);
9154 	bandwidth.full = dfixed_mul(src_width, bpp);
9155 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9156 	bandwidth.full = dfixed_div(bandwidth, line_time);
9157 
9158 	return dfixed_trunc(bandwidth);
9159 }
9160 
9161 /**
9162  * dce8_latency_watermark - get the latency watermark
9163  *
9164  * @wm: watermark calculation data
9165  *
9166  * Calculate the latency watermark (CIK).
9167  * Used for display watermark bandwidth calculations
9168  * Returns the latency watermark in ns
9169  */
9170 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9171 {
9172 	/* First calculate the latency in ns */
9173 	u32 mc_latency = 2000; /* 2000 ns. */
9174 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9175 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9176 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9177 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9178 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9179 		(wm->num_heads * cursor_line_pair_return_time);
9180 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9181 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9182 	u32 tmp, dmif_size = 12288;
9183 	fixed20_12 a, b, c;
9184 
9185 	if (wm->num_heads == 0)
9186 		return 0;
9187 
9188 	a.full = dfixed_const(2);
9189 	b.full = dfixed_const(1);
9190 	if ((wm->vsc.full > a.full) ||
9191 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9192 	    (wm->vtaps >= 5) ||
9193 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9194 		max_src_lines_per_dst_line = 4;
9195 	else
9196 		max_src_lines_per_dst_line = 2;
9197 
9198 	a.full = dfixed_const(available_bandwidth);
9199 	b.full = dfixed_const(wm->num_heads);
9200 	a.full = dfixed_div(a, b);
9201 
9202 	b.full = dfixed_const(mc_latency + 512);
9203 	c.full = dfixed_const(wm->disp_clk);
9204 	b.full = dfixed_div(b, c);
9205 
9206 	c.full = dfixed_const(dmif_size);
9207 	b.full = dfixed_div(c, b);
9208 
9209 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9210 
9211 	b.full = dfixed_const(1000);
9212 	c.full = dfixed_const(wm->disp_clk);
9213 	b.full = dfixed_div(c, b);
9214 	c.full = dfixed_const(wm->bytes_per_pixel);
9215 	b.full = dfixed_mul(b, c);
9216 
9217 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9218 
9219 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9220 	b.full = dfixed_const(1000);
9221 	c.full = dfixed_const(lb_fill_bw);
9222 	b.full = dfixed_div(c, b);
9223 	a.full = dfixed_div(a, b);
9224 	line_fill_time = dfixed_trunc(a);
9225 
9226 	if (line_fill_time < wm->active_time)
9227 		return latency;
9228 	else
9229 		return latency + (line_fill_time - wm->active_time);
9230 
9231 }
9232 
9233 /**
9234  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9235  * average and available dram bandwidth
9236  *
9237  * @wm: watermark calculation data
9238  *
9239  * Check if the display average bandwidth fits in the display
9240  * dram bandwidth (CIK).
9241  * Used for display watermark bandwidth calculations
9242  * Returns true if the display fits, false if not.
9243  */
9244 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9245 {
9246 	if (dce8_average_bandwidth(wm) <=
9247 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9248 		return true;
9249 	else
9250 		return false;
9251 }
9252 
9253 /**
9254  * dce8_average_bandwidth_vs_available_bandwidth - check
9255  * average and available bandwidth
9256  *
9257  * @wm: watermark calculation data
9258  *
9259  * Check if the display average bandwidth fits in the display
9260  * available bandwidth (CIK).
9261  * Used for display watermark bandwidth calculations
9262  * Returns true if the display fits, false if not.
9263  */
9264 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9265 {
9266 	if (dce8_average_bandwidth(wm) <=
9267 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9268 		return true;
9269 	else
9270 		return false;
9271 }
9272 
9273 /**
9274  * dce8_check_latency_hiding - check latency hiding
9275  *
9276  * @wm: watermark calculation data
9277  *
9278  * Check latency hiding (CIK).
9279  * Used for display watermark bandwidth calculations
9280  * Returns true if the display fits, false if not.
9281  */
9282 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9283 {
9284 	u32 lb_partitions = wm->lb_size / wm->src_width;
9285 	u32 line_time = wm->active_time + wm->blank_time;
9286 	u32 latency_tolerant_lines;
9287 	u32 latency_hiding;
9288 	fixed20_12 a;
9289 
9290 	a.full = dfixed_const(1);
9291 	if (wm->vsc.full > a.full)
9292 		latency_tolerant_lines = 1;
9293 	else {
9294 		if (lb_partitions <= (wm->vtaps + 1))
9295 			latency_tolerant_lines = 1;
9296 		else
9297 			latency_tolerant_lines = 2;
9298 	}
9299 
9300 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9301 
9302 	if (dce8_latency_watermark(wm) <= latency_hiding)
9303 		return true;
9304 	else
9305 		return false;
9306 }
9307 
9308 /**
9309  * dce8_program_watermarks - program display watermarks
9310  *
9311  * @rdev: radeon_device pointer
9312  * @radeon_crtc: the selected display controller
9313  * @lb_size: line buffer size
9314  * @num_heads: number of display controllers in use
9315  *
9316  * Calculate and program the display watermarks for the
9317  * selected display controller (CIK).
9318  */
9319 static void dce8_program_watermarks(struct radeon_device *rdev,
9320 				    struct radeon_crtc *radeon_crtc,
9321 				    u32 lb_size, u32 num_heads)
9322 {
9323 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9324 	struct dce8_wm_params wm_low, wm_high;
9325 	u32 pixel_period;
9326 	u32 line_time = 0;
9327 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9328 	u32 tmp, wm_mask;
9329 
9330 	if (radeon_crtc->base.enabled && num_heads && mode) {
9331 		pixel_period = 1000000 / (u32)mode->clock;
9332 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9333 
9334 		/* watermark for high clocks */
9335 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9336 		    rdev->pm.dpm_enabled) {
9337 			wm_high.yclk =
9338 				radeon_dpm_get_mclk(rdev, false) * 10;
9339 			wm_high.sclk =
9340 				radeon_dpm_get_sclk(rdev, false) * 10;
9341 		} else {
9342 			wm_high.yclk = rdev->pm.current_mclk * 10;
9343 			wm_high.sclk = rdev->pm.current_sclk * 10;
9344 		}
9345 
9346 		wm_high.disp_clk = mode->clock;
9347 		wm_high.src_width = mode->crtc_hdisplay;
9348 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9349 		wm_high.blank_time = line_time - wm_high.active_time;
9350 		wm_high.interlaced = false;
9351 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9352 			wm_high.interlaced = true;
9353 		wm_high.vsc = radeon_crtc->vsc;
9354 		wm_high.vtaps = 1;
9355 		if (radeon_crtc->rmx_type != RMX_OFF)
9356 			wm_high.vtaps = 2;
9357 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9358 		wm_high.lb_size = lb_size;
9359 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9360 		wm_high.num_heads = num_heads;
9361 
9362 		/* set for high clocks */
9363 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9364 
9365 		/* possibly force display priority to high */
9366 		/* should really do this at mode validation time... */
9367 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9368 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9369 		    !dce8_check_latency_hiding(&wm_high) ||
9370 		    (rdev->disp_priority == 2)) {
9371 			DRM_DEBUG_KMS("force priority to high\n");
9372 		}
9373 
9374 		/* watermark for low clocks */
9375 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9376 		    rdev->pm.dpm_enabled) {
9377 			wm_low.yclk =
9378 				radeon_dpm_get_mclk(rdev, true) * 10;
9379 			wm_low.sclk =
9380 				radeon_dpm_get_sclk(rdev, true) * 10;
9381 		} else {
9382 			wm_low.yclk = rdev->pm.current_mclk * 10;
9383 			wm_low.sclk = rdev->pm.current_sclk * 10;
9384 		}
9385 
9386 		wm_low.disp_clk = mode->clock;
9387 		wm_low.src_width = mode->crtc_hdisplay;
9388 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9389 		wm_low.blank_time = line_time - wm_low.active_time;
9390 		wm_low.interlaced = false;
9391 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9392 			wm_low.interlaced = true;
9393 		wm_low.vsc = radeon_crtc->vsc;
9394 		wm_low.vtaps = 1;
9395 		if (radeon_crtc->rmx_type != RMX_OFF)
9396 			wm_low.vtaps = 2;
9397 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9398 		wm_low.lb_size = lb_size;
9399 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9400 		wm_low.num_heads = num_heads;
9401 
9402 		/* set for low clocks */
9403 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9404 
9405 		/* possibly force display priority to high */
9406 		/* should really do this at mode validation time... */
9407 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9408 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9409 		    !dce8_check_latency_hiding(&wm_low) ||
9410 		    (rdev->disp_priority == 2)) {
9411 			DRM_DEBUG_KMS("force priority to high\n");
9412 		}
9413 	}
9414 
9415 	/* select wm A */
9416 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9417 	tmp = wm_mask;
9418 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9419 	tmp |= LATENCY_WATERMARK_MASK(1);
9420 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9421 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9422 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9423 		LATENCY_HIGH_WATERMARK(line_time)));
9424 	/* select wm B */
9425 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9426 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9427 	tmp |= LATENCY_WATERMARK_MASK(2);
9428 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9429 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9430 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9431 		LATENCY_HIGH_WATERMARK(line_time)));
9432 	/* restore original selection */
9433 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9434 
9435 	/* save values for DPM */
9436 	radeon_crtc->line_time = line_time;
9437 	radeon_crtc->wm_high = latency_watermark_a;
9438 	radeon_crtc->wm_low = latency_watermark_b;
9439 }
9440 
9441 /**
9442  * dce8_bandwidth_update - program display watermarks
9443  *
9444  * @rdev: radeon_device pointer
9445  *
9446  * Calculate and program the display watermarks and line
9447  * buffer allocation (CIK).
9448  */
9449 void dce8_bandwidth_update(struct radeon_device *rdev)
9450 {
9451 	struct drm_display_mode *mode = NULL;
9452 	u32 num_heads = 0, lb_size;
9453 	int i;
9454 
9455 	if (!rdev->mode_info.mode_config_initialized)
9456 		return;
9457 
9458 	radeon_update_display_priority(rdev);
9459 
9460 	for (i = 0; i < rdev->num_crtc; i++) {
9461 		if (rdev->mode_info.crtcs[i]->base.enabled)
9462 			num_heads++;
9463 	}
9464 	for (i = 0; i < rdev->num_crtc; i++) {
9465 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9466 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9467 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9468 	}
9469 }
9470 
9471 /**
9472  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9473  *
9474  * @rdev: radeon_device pointer
9475  *
9476  * Fetches a GPU clock counter snapshot (SI).
9477  * Returns the 64 bit clock counter snapshot.
9478  */
9479 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9480 {
9481 	uint64_t clock;
9482 
9483 	mutex_lock(&rdev->gpu_clock_mutex);
9484 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9485 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9486 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9487 	mutex_unlock(&rdev->gpu_clock_mutex);
9488 	return clock;
9489 }
9490 
9491 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9492                               u32 cntl_reg, u32 status_reg)
9493 {
9494 	int r, i;
9495 	struct atom_clock_dividers dividers;
9496 	uint32_t tmp;
9497 
9498 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9499 					   clock, false, &dividers);
9500 	if (r)
9501 		return r;
9502 
9503 	tmp = RREG32_SMC(cntl_reg);
9504 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9505 	tmp |= dividers.post_divider;
9506 	WREG32_SMC(cntl_reg, tmp);
9507 
9508 	for (i = 0; i < 100; i++) {
9509 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9510 			break;
9511 		mdelay(10);
9512 	}
9513 	if (i == 100)
9514 		return -ETIMEDOUT;
9515 
9516 	return 0;
9517 }
9518 
9519 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9520 {
9521 	int r = 0;
9522 
9523 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9524 	if (r)
9525 		return r;
9526 
9527 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9528 	return r;
9529 }
9530 
9531 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9532 {
9533 	int r, i;
9534 	struct atom_clock_dividers dividers;
9535 	u32 tmp;
9536 
9537 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9538 					   ecclk, false, &dividers);
9539 	if (r)
9540 		return r;
9541 
9542 	for (i = 0; i < 100; i++) {
9543 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9544 			break;
9545 		mdelay(10);
9546 	}
9547 	if (i == 100)
9548 		return -ETIMEDOUT;
9549 
9550 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9551 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9552 	tmp |= dividers.post_divider;
9553 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9554 
9555 	for (i = 0; i < 100; i++) {
9556 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9557 			break;
9558 		mdelay(10);
9559 	}
9560 	if (i == 100)
9561 		return -ETIMEDOUT;
9562 
9563 	return 0;
9564 }
9565 
9566 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9567 {
9568 	struct pci_dev *root = rdev->pdev->bus->self;
9569 	int bridge_pos, gpu_pos;
9570 	u32 speed_cntl, mask, current_data_rate;
9571 	int ret, i;
9572 	u16 tmp16;
9573 
9574 	if (pci_is_root_bus(rdev->pdev->bus))
9575 		return;
9576 
9577 	if (radeon_pcie_gen2 == 0)
9578 		return;
9579 
9580 	if (rdev->flags & RADEON_IS_IGP)
9581 		return;
9582 
9583 	if (!(rdev->flags & RADEON_IS_PCIE))
9584 		return;
9585 
9586 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9587 	if (ret != 0)
9588 		return;
9589 
9590 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9591 		return;
9592 
9593 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9594 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9595 		LC_CURRENT_DATA_RATE_SHIFT;
9596 	if (mask & DRM_PCIE_SPEED_80) {
9597 		if (current_data_rate == 2) {
9598 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9599 			return;
9600 		}
9601 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9602 	} else if (mask & DRM_PCIE_SPEED_50) {
9603 		if (current_data_rate == 1) {
9604 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9605 			return;
9606 		}
9607 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9608 	}
9609 
9610 	bridge_pos = pci_pcie_cap(root);
9611 	if (!bridge_pos)
9612 		return;
9613 
9614 	gpu_pos = pci_pcie_cap(rdev->pdev);
9615 	if (!gpu_pos)
9616 		return;
9617 
9618 	if (mask & DRM_PCIE_SPEED_80) {
9619 		/* re-try equalization if gen3 is not already enabled */
9620 		if (current_data_rate != 2) {
9621 			u16 bridge_cfg, gpu_cfg;
9622 			u16 bridge_cfg2, gpu_cfg2;
9623 			u32 max_lw, current_lw, tmp;
9624 
9625 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9626 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9627 
9628 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9629 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9630 
9631 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9632 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9633 
9634 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9635 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9636 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9637 
9638 			if (current_lw < max_lw) {
9639 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9640 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9641 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9642 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9643 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9644 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9645 				}
9646 			}
9647 
9648 			for (i = 0; i < 10; i++) {
9649 				/* check status */
9650 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9651 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9652 					break;
9653 
9654 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9655 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9656 
9657 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9658 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9659 
9660 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9661 				tmp |= LC_SET_QUIESCE;
9662 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9663 
9664 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9665 				tmp |= LC_REDO_EQ;
9666 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9667 
9668 				mdelay(100);
9669 
9670 				/* linkctl */
9671 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9672 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9673 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9674 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9675 
9676 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9677 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9678 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9679 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9680 
9681 				/* linkctl2 */
9682 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9683 				tmp16 &= ~((1 << 4) | (7 << 9));
9684 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9685 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9686 
9687 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9688 				tmp16 &= ~((1 << 4) | (7 << 9));
9689 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9690 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9691 
9692 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9693 				tmp &= ~LC_SET_QUIESCE;
9694 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9695 			}
9696 		}
9697 	}
9698 
9699 	/* set the link speed */
9700 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9701 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9702 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9703 
9704 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9705 	tmp16 &= ~0xf;
9706 	if (mask & DRM_PCIE_SPEED_80)
9707 		tmp16 |= 3; /* gen3 */
9708 	else if (mask & DRM_PCIE_SPEED_50)
9709 		tmp16 |= 2; /* gen2 */
9710 	else
9711 		tmp16 |= 1; /* gen1 */
9712 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9713 
9714 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9715 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9716 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9717 
9718 	for (i = 0; i < rdev->usec_timeout; i++) {
9719 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9720 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9721 			break;
9722 		udelay(1);
9723 	}
9724 }
9725 
9726 static void cik_program_aspm(struct radeon_device *rdev)
9727 {
9728 	u32 data, orig;
9729 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9730 	bool disable_clkreq = false;
9731 
9732 	if (radeon_aspm == 0)
9733 		return;
9734 
9735 	/* XXX double check IGPs */
9736 	if (rdev->flags & RADEON_IS_IGP)
9737 		return;
9738 
9739 	if (!(rdev->flags & RADEON_IS_PCIE))
9740 		return;
9741 
9742 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9743 	data &= ~LC_XMIT_N_FTS_MASK;
9744 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9745 	if (orig != data)
9746 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9747 
9748 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9749 	data |= LC_GO_TO_RECOVERY;
9750 	if (orig != data)
9751 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9752 
9753 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9754 	data |= P_IGNORE_EDB_ERR;
9755 	if (orig != data)
9756 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9757 
9758 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9759 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9760 	data |= LC_PMI_TO_L1_DIS;
9761 	if (!disable_l0s)
9762 		data |= LC_L0S_INACTIVITY(7);
9763 
9764 	if (!disable_l1) {
9765 		data |= LC_L1_INACTIVITY(7);
9766 		data &= ~LC_PMI_TO_L1_DIS;
9767 		if (orig != data)
9768 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9769 
9770 		if (!disable_plloff_in_l1) {
9771 			bool clk_req_support;
9772 
9773 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9774 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9775 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9776 			if (orig != data)
9777 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9778 
9779 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9780 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9781 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9782 			if (orig != data)
9783 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9784 
9785 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9786 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9787 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9788 			if (orig != data)
9789 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9790 
9791 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9792 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9793 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9794 			if (orig != data)
9795 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9796 
9797 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9798 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9799 			data |= LC_DYN_LANES_PWR_STATE(3);
9800 			if (orig != data)
9801 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9802 
9803 			if (!disable_clkreq &&
9804 			    !pci_is_root_bus(rdev->pdev->bus)) {
9805 				struct pci_dev *root = rdev->pdev->bus->self;
9806 				u32 lnkcap;
9807 
9808 				clk_req_support = false;
9809 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9810 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9811 					clk_req_support = true;
9812 			} else {
9813 				clk_req_support = false;
9814 			}
9815 
9816 			if (clk_req_support) {
9817 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9818 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9819 				if (orig != data)
9820 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9821 
9822 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9823 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9824 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9825 				if (orig != data)
9826 					WREG32_SMC(THM_CLK_CNTL, data);
9827 
9828 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9829 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9830 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9831 				if (orig != data)
9832 					WREG32_SMC(MISC_CLK_CTRL, data);
9833 
9834 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9835 				data &= ~BCLK_AS_XCLK;
9836 				if (orig != data)
9837 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9838 
9839 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9840 				data &= ~FORCE_BIF_REFCLK_EN;
9841 				if (orig != data)
9842 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9843 
9844 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9845 				data &= ~MPLL_CLKOUT_SEL_MASK;
9846 				data |= MPLL_CLKOUT_SEL(4);
9847 				if (orig != data)
9848 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9849 			}
9850 		}
9851 	} else {
9852 		if (orig != data)
9853 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9854 	}
9855 
9856 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9857 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9858 	if (orig != data)
9859 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9860 
9861 	if (!disable_l0s) {
9862 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9863 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9864 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9865 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9866 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9867 				data &= ~LC_L0S_INACTIVITY_MASK;
9868 				if (orig != data)
9869 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9870 			}
9871 		}
9872 	}
9873 }
9874