xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 206204a1)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72 
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78 extern void sumo_rlc_fini(struct radeon_device *rdev);
79 extern int sumo_rlc_init(struct radeon_device *rdev);
80 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81 extern void si_rlc_reset(struct radeon_device *rdev);
82 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
84 extern int cik_sdma_resume(struct radeon_device *rdev);
85 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
86 extern void cik_sdma_fini(struct radeon_device *rdev);
87 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
88 static void cik_rlc_stop(struct radeon_device *rdev);
89 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
90 static void cik_program_aspm(struct radeon_device *rdev);
91 static void cik_init_pg(struct radeon_device *rdev);
92 static void cik_init_cg(struct radeon_device *rdev);
93 static void cik_fini_pg(struct radeon_device *rdev);
94 static void cik_fini_cg(struct radeon_device *rdev);
95 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
96 					  bool enable);
97 
98 /* get temperature in millidegrees */
99 int ci_get_temp(struct radeon_device *rdev)
100 {
101 	u32 temp;
102 	int actual_temp = 0;
103 
104 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
105 		CTF_TEMP_SHIFT;
106 
107 	if (temp & 0x200)
108 		actual_temp = 255;
109 	else
110 		actual_temp = temp & 0x1ff;
111 
112 	actual_temp = actual_temp * 1000;
113 
114 	return actual_temp;
115 }
116 
117 /* get temperature in millidegrees */
118 int kv_get_temp(struct radeon_device *rdev)
119 {
120 	u32 temp;
121 	int actual_temp = 0;
122 
123 	temp = RREG32_SMC(0xC0300E0C);
124 
125 	if (temp)
126 		actual_temp = (temp / 8) - 49;
127 	else
128 		actual_temp = 0;
129 
130 	actual_temp = actual_temp * 1000;
131 
132 	return actual_temp;
133 }
134 
135 /*
136  * Indirect registers accessor
137  */
138 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
139 {
140 	unsigned long flags;
141 	u32 r;
142 
143 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
144 	WREG32(PCIE_INDEX, reg);
145 	(void)RREG32(PCIE_INDEX);
146 	r = RREG32(PCIE_DATA);
147 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 	return r;
149 }
150 
151 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
152 {
153 	unsigned long flags;
154 
155 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
156 	WREG32(PCIE_INDEX, reg);
157 	(void)RREG32(PCIE_INDEX);
158 	WREG32(PCIE_DATA, v);
159 	(void)RREG32(PCIE_DATA);
160 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
161 }
162 
163 static const u32 spectre_rlc_save_restore_register_list[] =
164 {
165 	(0x0e00 << 16) | (0xc12c >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc140 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc150 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc15c >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc168 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc170 >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc178 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0xc204 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0xc2b4 >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0xc2b8 >> 2),
184 	0x00000000,
185 	(0x0e00 << 16) | (0xc2bc >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0xc2c0 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x8228 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0x829c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x869c >> 2),
194 	0x00000000,
195 	(0x0600 << 16) | (0x98f4 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x98f8 >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x9900 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc260 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0x90e8 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0x3c000 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0x3c00c >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0x8c1c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0x9700 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x4e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0x5e00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0x6e00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x7e00 << 16) | (0xcd20 >> 2),
222 	0x00000000,
223 	(0x8e00 << 16) | (0xcd20 >> 2),
224 	0x00000000,
225 	(0x9e00 << 16) | (0xcd20 >> 2),
226 	0x00000000,
227 	(0xae00 << 16) | (0xcd20 >> 2),
228 	0x00000000,
229 	(0xbe00 << 16) | (0xcd20 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x89bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8900 >> 2),
234 	0x00000000,
235 	0x3,
236 	(0x0e00 << 16) | (0xc130 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc134 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc1fc >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc208 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc264 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc268 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc26c >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc270 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc274 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc278 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc27c >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc280 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc284 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc288 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc28c >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc290 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc294 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc298 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc29c >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc2a0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc2a4 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc2a8 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc2ac  >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc2b0 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x301d0 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x30238 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x30250 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x30254 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x30258 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x3025c >> 2),
295 	0x00000000,
296 	(0x4e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0x5e00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0x6e00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x7e00 << 16) | (0xc900 >> 2),
303 	0x00000000,
304 	(0x8e00 << 16) | (0xc900 >> 2),
305 	0x00000000,
306 	(0x9e00 << 16) | (0xc900 >> 2),
307 	0x00000000,
308 	(0xae00 << 16) | (0xc900 >> 2),
309 	0x00000000,
310 	(0xbe00 << 16) | (0xc900 >> 2),
311 	0x00000000,
312 	(0x4e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0x5e00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0x6e00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x7e00 << 16) | (0xc904 >> 2),
319 	0x00000000,
320 	(0x8e00 << 16) | (0xc904 >> 2),
321 	0x00000000,
322 	(0x9e00 << 16) | (0xc904 >> 2),
323 	0x00000000,
324 	(0xae00 << 16) | (0xc904 >> 2),
325 	0x00000000,
326 	(0xbe00 << 16) | (0xc904 >> 2),
327 	0x00000000,
328 	(0x4e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0x5e00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0x6e00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x7e00 << 16) | (0xc908 >> 2),
335 	0x00000000,
336 	(0x8e00 << 16) | (0xc908 >> 2),
337 	0x00000000,
338 	(0x9e00 << 16) | (0xc908 >> 2),
339 	0x00000000,
340 	(0xae00 << 16) | (0xc908 >> 2),
341 	0x00000000,
342 	(0xbe00 << 16) | (0xc908 >> 2),
343 	0x00000000,
344 	(0x4e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0x5e00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0x6e00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x7e00 << 16) | (0xc90c >> 2),
351 	0x00000000,
352 	(0x8e00 << 16) | (0xc90c >> 2),
353 	0x00000000,
354 	(0x9e00 << 16) | (0xc90c >> 2),
355 	0x00000000,
356 	(0xae00 << 16) | (0xc90c >> 2),
357 	0x00000000,
358 	(0xbe00 << 16) | (0xc90c >> 2),
359 	0x00000000,
360 	(0x4e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0x5e00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0x6e00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x7e00 << 16) | (0xc910 >> 2),
367 	0x00000000,
368 	(0x8e00 << 16) | (0xc910 >> 2),
369 	0x00000000,
370 	(0x9e00 << 16) | (0xc910 >> 2),
371 	0x00000000,
372 	(0xae00 << 16) | (0xc910 >> 2),
373 	0x00000000,
374 	(0xbe00 << 16) | (0xc910 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc99c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x9834 >> 2),
379 	0x00000000,
380 	(0x0000 << 16) | (0x30f00 >> 2),
381 	0x00000000,
382 	(0x0001 << 16) | (0x30f00 >> 2),
383 	0x00000000,
384 	(0x0000 << 16) | (0x30f04 >> 2),
385 	0x00000000,
386 	(0x0001 << 16) | (0x30f04 >> 2),
387 	0x00000000,
388 	(0x0000 << 16) | (0x30f08 >> 2),
389 	0x00000000,
390 	(0x0001 << 16) | (0x30f08 >> 2),
391 	0x00000000,
392 	(0x0000 << 16) | (0x30f0c >> 2),
393 	0x00000000,
394 	(0x0001 << 16) | (0x30f0c >> 2),
395 	0x00000000,
396 	(0x0600 << 16) | (0x9b7c >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8a14 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x8a18 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a00 >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x8bf0 >> 2),
405 	0x00000000,
406 	(0x0e00 << 16) | (0x8bcc >> 2),
407 	0x00000000,
408 	(0x0e00 << 16) | (0x8b24 >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0x30a04 >> 2),
411 	0x00000000,
412 	(0x0600 << 16) | (0x30a10 >> 2),
413 	0x00000000,
414 	(0x0600 << 16) | (0x30a14 >> 2),
415 	0x00000000,
416 	(0x0600 << 16) | (0x30a18 >> 2),
417 	0x00000000,
418 	(0x0600 << 16) | (0x30a2c >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc700 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc704 >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0xc708 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0xc768 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc770 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc774 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc778 >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc77c >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc780 >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc784 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc788 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc78c >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc798 >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc79c >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7a0 >> 2),
449 	0x00000000,
450 	(0x0400 << 16) | (0xc7a4 >> 2),
451 	0x00000000,
452 	(0x0400 << 16) | (0xc7a8 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc7ac >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc7b0 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc7b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x9100 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x3c010 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92a8 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92ac >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92b4 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92b8 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92bc >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x92c0 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x92c4 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x92c8 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x92cc >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x92d0 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0x8c00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x8c04 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x8c20 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x8c38 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x8c3c >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xae00 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x9604 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac08 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac0c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac10 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac14 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac58 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac68 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac6c >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac70 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac74 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac78 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0xac7c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xac80 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0xac84 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac88 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac8c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x970c >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x9714 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x9718 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x971c >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x4e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0x5e00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0x6e00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x7e00 << 16) | (0x31068 >> 2),
545 	0x00000000,
546 	(0x8e00 << 16) | (0x31068 >> 2),
547 	0x00000000,
548 	(0x9e00 << 16) | (0x31068 >> 2),
549 	0x00000000,
550 	(0xae00 << 16) | (0x31068 >> 2),
551 	0x00000000,
552 	(0xbe00 << 16) | (0x31068 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xcd10 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xcd14 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88b0 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88b4 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88b8 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88bc >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0x89c0 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x88c4 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x88c8 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x88d0 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x88d4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x88d8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x8980 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30938 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x3093c >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x30940 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x89a0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x30900 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x30904 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x89b4 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x3c210 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x3c214 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x3c218 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8904 >> 2),
601 	0x00000000,
602 	0x5,
603 	(0x0e00 << 16) | (0x8c28 >> 2),
604 	(0x0e00 << 16) | (0x8c2c >> 2),
605 	(0x0e00 << 16) | (0x8c30 >> 2),
606 	(0x0e00 << 16) | (0x8c34 >> 2),
607 	(0x0e00 << 16) | (0x9600 >> 2),
608 };
609 
610 static const u32 kalindi_rlc_save_restore_register_list[] =
611 {
612 	(0x0e00 << 16) | (0xc12c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc140 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc150 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc15c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc168 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc170 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xc204 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xc2b4 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xc2b8 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xc2bc >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xc2c0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x8228 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x829c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x869c >> 2),
639 	0x00000000,
640 	(0x0600 << 16) | (0x98f4 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x98f8 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x9900 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc260 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0x90e8 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0x3c000 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x3c00c >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0x8c1c >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0x9700 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd20 >> 2),
659 	0x00000000,
660 	(0x4e00 << 16) | (0xcd20 >> 2),
661 	0x00000000,
662 	(0x5e00 << 16) | (0xcd20 >> 2),
663 	0x00000000,
664 	(0x6e00 << 16) | (0xcd20 >> 2),
665 	0x00000000,
666 	(0x7e00 << 16) | (0xcd20 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x89bc >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x8900 >> 2),
671 	0x00000000,
672 	0x3,
673 	(0x0e00 << 16) | (0xc130 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc134 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc1fc >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc208 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc264 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc268 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc26c >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc270 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc274 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc28c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc290 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc294 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc298 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc2a0 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc2a4 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc2a8 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc2ac >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x301d0 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x30238 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0x30250 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x30254 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x30258 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x3025c >> 2),
718 	0x00000000,
719 	(0x4e00 << 16) | (0xc900 >> 2),
720 	0x00000000,
721 	(0x5e00 << 16) | (0xc900 >> 2),
722 	0x00000000,
723 	(0x6e00 << 16) | (0xc900 >> 2),
724 	0x00000000,
725 	(0x7e00 << 16) | (0xc900 >> 2),
726 	0x00000000,
727 	(0x4e00 << 16) | (0xc904 >> 2),
728 	0x00000000,
729 	(0x5e00 << 16) | (0xc904 >> 2),
730 	0x00000000,
731 	(0x6e00 << 16) | (0xc904 >> 2),
732 	0x00000000,
733 	(0x7e00 << 16) | (0xc904 >> 2),
734 	0x00000000,
735 	(0x4e00 << 16) | (0xc908 >> 2),
736 	0x00000000,
737 	(0x5e00 << 16) | (0xc908 >> 2),
738 	0x00000000,
739 	(0x6e00 << 16) | (0xc908 >> 2),
740 	0x00000000,
741 	(0x7e00 << 16) | (0xc908 >> 2),
742 	0x00000000,
743 	(0x4e00 << 16) | (0xc90c >> 2),
744 	0x00000000,
745 	(0x5e00 << 16) | (0xc90c >> 2),
746 	0x00000000,
747 	(0x6e00 << 16) | (0xc90c >> 2),
748 	0x00000000,
749 	(0x7e00 << 16) | (0xc90c >> 2),
750 	0x00000000,
751 	(0x4e00 << 16) | (0xc910 >> 2),
752 	0x00000000,
753 	(0x5e00 << 16) | (0xc910 >> 2),
754 	0x00000000,
755 	(0x6e00 << 16) | (0xc910 >> 2),
756 	0x00000000,
757 	(0x7e00 << 16) | (0xc910 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0xc99c >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x9834 >> 2),
762 	0x00000000,
763 	(0x0000 << 16) | (0x30f00 >> 2),
764 	0x00000000,
765 	(0x0000 << 16) | (0x30f04 >> 2),
766 	0x00000000,
767 	(0x0000 << 16) | (0x30f08 >> 2),
768 	0x00000000,
769 	(0x0000 << 16) | (0x30f0c >> 2),
770 	0x00000000,
771 	(0x0600 << 16) | (0x9b7c >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8a14 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x8a18 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a00 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x8bf0 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0x8bcc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0x8b24 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0x30a04 >> 2),
786 	0x00000000,
787 	(0x0600 << 16) | (0x30a10 >> 2),
788 	0x00000000,
789 	(0x0600 << 16) | (0x30a14 >> 2),
790 	0x00000000,
791 	(0x0600 << 16) | (0x30a18 >> 2),
792 	0x00000000,
793 	(0x0600 << 16) | (0x30a2c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc700 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc704 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc708 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc768 >> 2),
802 	0x00000000,
803 	(0x0400 << 16) | (0xc770 >> 2),
804 	0x00000000,
805 	(0x0400 << 16) | (0xc774 >> 2),
806 	0x00000000,
807 	(0x0400 << 16) | (0xc798 >> 2),
808 	0x00000000,
809 	(0x0400 << 16) | (0xc79c >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x9100 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x3c010 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x8c00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8c04 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8c20 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8c38 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8c3c >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xae00 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x9604 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac08 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac0c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac10 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac14 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac58 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac68 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac6c >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac70 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac74 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac78 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0xac7c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xac80 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0xac84 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac88 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac8c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x970c >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x9714 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x9718 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x971c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x31068 >> 2),
868 	0x00000000,
869 	(0x4e00 << 16) | (0x31068 >> 2),
870 	0x00000000,
871 	(0x5e00 << 16) | (0x31068 >> 2),
872 	0x00000000,
873 	(0x6e00 << 16) | (0x31068 >> 2),
874 	0x00000000,
875 	(0x7e00 << 16) | (0x31068 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xcd10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xcd14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88b0 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88b4 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88b8 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88bc >> 2),
888 	0x00000000,
889 	(0x0400 << 16) | (0x89c0 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x88c4 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x88c8 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x88d0 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x88d4 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x88d8 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x8980 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30938 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x3093c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x30940 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x89a0 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x30900 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x30904 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x89b4 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3e1fc >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x3c210 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c214 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x3c218 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8904 >> 2),
926 	0x00000000,
927 	0x5,
928 	(0x0e00 << 16) | (0x8c28 >> 2),
929 	(0x0e00 << 16) | (0x8c2c >> 2),
930 	(0x0e00 << 16) | (0x8c30 >> 2),
931 	(0x0e00 << 16) | (0x8c34 >> 2),
932 	(0x0e00 << 16) | (0x9600 >> 2),
933 };
934 
935 static const u32 bonaire_golden_spm_registers[] =
936 {
937 	0x30800, 0xe0ffffff, 0xe0000000
938 };
939 
940 static const u32 bonaire_golden_common_registers[] =
941 {
942 	0xc770, 0xffffffff, 0x00000800,
943 	0xc774, 0xffffffff, 0x00000800,
944 	0xc798, 0xffffffff, 0x00007fbf,
945 	0xc79c, 0xffffffff, 0x00007faf
946 };
947 
948 static const u32 bonaire_golden_registers[] =
949 {
950 	0x3354, 0x00000333, 0x00000333,
951 	0x3350, 0x000c0fc0, 0x00040200,
952 	0x9a10, 0x00010000, 0x00058208,
953 	0x3c000, 0xffff1fff, 0x00140000,
954 	0x3c200, 0xfdfc0fff, 0x00000100,
955 	0x3c234, 0x40000000, 0x40000200,
956 	0x9830, 0xffffffff, 0x00000000,
957 	0x9834, 0xf00fffff, 0x00000400,
958 	0x9838, 0x0002021c, 0x00020200,
959 	0xc78, 0x00000080, 0x00000000,
960 	0x5bb0, 0x000000f0, 0x00000070,
961 	0x5bc0, 0xf0311fff, 0x80300000,
962 	0x98f8, 0x73773777, 0x12010001,
963 	0x350c, 0x00810000, 0x408af000,
964 	0x7030, 0x31000111, 0x00000011,
965 	0x2f48, 0x73773777, 0x12010001,
966 	0x220c, 0x00007fb6, 0x0021a1b1,
967 	0x2210, 0x00007fb6, 0x002021b1,
968 	0x2180, 0x00007fb6, 0x00002191,
969 	0x2218, 0x00007fb6, 0x002121b1,
970 	0x221c, 0x00007fb6, 0x002021b1,
971 	0x21dc, 0x00007fb6, 0x00002191,
972 	0x21e0, 0x00007fb6, 0x00002191,
973 	0x3628, 0x0000003f, 0x0000000a,
974 	0x362c, 0x0000003f, 0x0000000a,
975 	0x2ae4, 0x00073ffe, 0x000022a2,
976 	0x240c, 0x000007ff, 0x00000000,
977 	0x8a14, 0xf000003f, 0x00000007,
978 	0x8bf0, 0x00002001, 0x00000001,
979 	0x8b24, 0xffffffff, 0x00ffffff,
980 	0x30a04, 0x0000ff0f, 0x00000000,
981 	0x28a4c, 0x07ffffff, 0x06000000,
982 	0x4d8, 0x00000fff, 0x00000100,
983 	0x3e78, 0x00000001, 0x00000002,
984 	0x9100, 0x03000000, 0x0362c688,
985 	0x8c00, 0x000000ff, 0x00000001,
986 	0xe40, 0x00001fff, 0x00001fff,
987 	0x9060, 0x0000007f, 0x00000020,
988 	0x9508, 0x00010000, 0x00010000,
989 	0xac14, 0x000003ff, 0x000000f3,
990 	0xac0c, 0xffffffff, 0x00001032
991 };
992 
993 static const u32 bonaire_mgcg_cgcg_init[] =
994 {
995 	0xc420, 0xffffffff, 0xfffffffc,
996 	0x30800, 0xffffffff, 0xe0000000,
997 	0x3c2a0, 0xffffffff, 0x00000100,
998 	0x3c208, 0xffffffff, 0x00000100,
999 	0x3c2c0, 0xffffffff, 0xc0000100,
1000 	0x3c2c8, 0xffffffff, 0xc0000100,
1001 	0x3c2c4, 0xffffffff, 0xc0000100,
1002 	0x55e4, 0xffffffff, 0x00600100,
1003 	0x3c280, 0xffffffff, 0x00000100,
1004 	0x3c214, 0xffffffff, 0x06000100,
1005 	0x3c220, 0xffffffff, 0x00000100,
1006 	0x3c218, 0xffffffff, 0x06000100,
1007 	0x3c204, 0xffffffff, 0x00000100,
1008 	0x3c2e0, 0xffffffff, 0x00000100,
1009 	0x3c224, 0xffffffff, 0x00000100,
1010 	0x3c200, 0xffffffff, 0x00000100,
1011 	0x3c230, 0xffffffff, 0x00000100,
1012 	0x3c234, 0xffffffff, 0x00000100,
1013 	0x3c250, 0xffffffff, 0x00000100,
1014 	0x3c254, 0xffffffff, 0x00000100,
1015 	0x3c258, 0xffffffff, 0x00000100,
1016 	0x3c25c, 0xffffffff, 0x00000100,
1017 	0x3c260, 0xffffffff, 0x00000100,
1018 	0x3c27c, 0xffffffff, 0x00000100,
1019 	0x3c278, 0xffffffff, 0x00000100,
1020 	0x3c210, 0xffffffff, 0x06000100,
1021 	0x3c290, 0xffffffff, 0x00000100,
1022 	0x3c274, 0xffffffff, 0x00000100,
1023 	0x3c2b4, 0xffffffff, 0x00000100,
1024 	0x3c2b0, 0xffffffff, 0x00000100,
1025 	0x3c270, 0xffffffff, 0x00000100,
1026 	0x30800, 0xffffffff, 0xe0000000,
1027 	0x3c020, 0xffffffff, 0x00010000,
1028 	0x3c024, 0xffffffff, 0x00030002,
1029 	0x3c028, 0xffffffff, 0x00040007,
1030 	0x3c02c, 0xffffffff, 0x00060005,
1031 	0x3c030, 0xffffffff, 0x00090008,
1032 	0x3c034, 0xffffffff, 0x00010000,
1033 	0x3c038, 0xffffffff, 0x00030002,
1034 	0x3c03c, 0xffffffff, 0x00040007,
1035 	0x3c040, 0xffffffff, 0x00060005,
1036 	0x3c044, 0xffffffff, 0x00090008,
1037 	0x3c048, 0xffffffff, 0x00010000,
1038 	0x3c04c, 0xffffffff, 0x00030002,
1039 	0x3c050, 0xffffffff, 0x00040007,
1040 	0x3c054, 0xffffffff, 0x00060005,
1041 	0x3c058, 0xffffffff, 0x00090008,
1042 	0x3c05c, 0xffffffff, 0x00010000,
1043 	0x3c060, 0xffffffff, 0x00030002,
1044 	0x3c064, 0xffffffff, 0x00040007,
1045 	0x3c068, 0xffffffff, 0x00060005,
1046 	0x3c06c, 0xffffffff, 0x00090008,
1047 	0x3c070, 0xffffffff, 0x00010000,
1048 	0x3c074, 0xffffffff, 0x00030002,
1049 	0x3c078, 0xffffffff, 0x00040007,
1050 	0x3c07c, 0xffffffff, 0x00060005,
1051 	0x3c080, 0xffffffff, 0x00090008,
1052 	0x3c084, 0xffffffff, 0x00010000,
1053 	0x3c088, 0xffffffff, 0x00030002,
1054 	0x3c08c, 0xffffffff, 0x00040007,
1055 	0x3c090, 0xffffffff, 0x00060005,
1056 	0x3c094, 0xffffffff, 0x00090008,
1057 	0x3c098, 0xffffffff, 0x00010000,
1058 	0x3c09c, 0xffffffff, 0x00030002,
1059 	0x3c0a0, 0xffffffff, 0x00040007,
1060 	0x3c0a4, 0xffffffff, 0x00060005,
1061 	0x3c0a8, 0xffffffff, 0x00090008,
1062 	0x3c000, 0xffffffff, 0x96e00200,
1063 	0x8708, 0xffffffff, 0x00900100,
1064 	0xc424, 0xffffffff, 0x0020003f,
1065 	0x38, 0xffffffff, 0x0140001c,
1066 	0x3c, 0x000f0000, 0x000f0000,
1067 	0x220, 0xffffffff, 0xC060000C,
1068 	0x224, 0xc0000fff, 0x00000100,
1069 	0xf90, 0xffffffff, 0x00000100,
1070 	0xf98, 0x00000101, 0x00000000,
1071 	0x20a8, 0xffffffff, 0x00000104,
1072 	0x55e4, 0xff000fff, 0x00000100,
1073 	0x30cc, 0xc0000fff, 0x00000104,
1074 	0xc1e4, 0x00000001, 0x00000001,
1075 	0xd00c, 0xff000ff0, 0x00000100,
1076 	0xd80c, 0xff000ff0, 0x00000100
1077 };
1078 
1079 static const u32 spectre_golden_spm_registers[] =
1080 {
1081 	0x30800, 0xe0ffffff, 0xe0000000
1082 };
1083 
1084 static const u32 spectre_golden_common_registers[] =
1085 {
1086 	0xc770, 0xffffffff, 0x00000800,
1087 	0xc774, 0xffffffff, 0x00000800,
1088 	0xc798, 0xffffffff, 0x00007fbf,
1089 	0xc79c, 0xffffffff, 0x00007faf
1090 };
1091 
1092 static const u32 spectre_golden_registers[] =
1093 {
1094 	0x3c000, 0xffff1fff, 0x96940200,
1095 	0x3c00c, 0xffff0001, 0xff000000,
1096 	0x3c200, 0xfffc0fff, 0x00000100,
1097 	0x6ed8, 0x00010101, 0x00010000,
1098 	0x9834, 0xf00fffff, 0x00000400,
1099 	0x9838, 0xfffffffc, 0x00020200,
1100 	0x5bb0, 0x000000f0, 0x00000070,
1101 	0x5bc0, 0xf0311fff, 0x80300000,
1102 	0x98f8, 0x73773777, 0x12010001,
1103 	0x9b7c, 0x00ff0000, 0x00fc0000,
1104 	0x2f48, 0x73773777, 0x12010001,
1105 	0x8a14, 0xf000003f, 0x00000007,
1106 	0x8b24, 0xffffffff, 0x00ffffff,
1107 	0x28350, 0x3f3f3fff, 0x00000082,
1108 	0x28354, 0x0000003f, 0x00000000,
1109 	0x3e78, 0x00000001, 0x00000002,
1110 	0x913c, 0xffff03df, 0x00000004,
1111 	0xc768, 0x00000008, 0x00000008,
1112 	0x8c00, 0x000008ff, 0x00000800,
1113 	0x9508, 0x00010000, 0x00010000,
1114 	0xac0c, 0xffffffff, 0x54763210,
1115 	0x214f8, 0x01ff01ff, 0x00000002,
1116 	0x21498, 0x007ff800, 0x00200000,
1117 	0x2015c, 0xffffffff, 0x00000f40,
1118 	0x30934, 0xffffffff, 0x00000001
1119 };
1120 
1121 static const u32 spectre_mgcg_cgcg_init[] =
1122 {
1123 	0xc420, 0xffffffff, 0xfffffffc,
1124 	0x30800, 0xffffffff, 0xe0000000,
1125 	0x3c2a0, 0xffffffff, 0x00000100,
1126 	0x3c208, 0xffffffff, 0x00000100,
1127 	0x3c2c0, 0xffffffff, 0x00000100,
1128 	0x3c2c8, 0xffffffff, 0x00000100,
1129 	0x3c2c4, 0xffffffff, 0x00000100,
1130 	0x55e4, 0xffffffff, 0x00600100,
1131 	0x3c280, 0xffffffff, 0x00000100,
1132 	0x3c214, 0xffffffff, 0x06000100,
1133 	0x3c220, 0xffffffff, 0x00000100,
1134 	0x3c218, 0xffffffff, 0x06000100,
1135 	0x3c204, 0xffffffff, 0x00000100,
1136 	0x3c2e0, 0xffffffff, 0x00000100,
1137 	0x3c224, 0xffffffff, 0x00000100,
1138 	0x3c200, 0xffffffff, 0x00000100,
1139 	0x3c230, 0xffffffff, 0x00000100,
1140 	0x3c234, 0xffffffff, 0x00000100,
1141 	0x3c250, 0xffffffff, 0x00000100,
1142 	0x3c254, 0xffffffff, 0x00000100,
1143 	0x3c258, 0xffffffff, 0x00000100,
1144 	0x3c25c, 0xffffffff, 0x00000100,
1145 	0x3c260, 0xffffffff, 0x00000100,
1146 	0x3c27c, 0xffffffff, 0x00000100,
1147 	0x3c278, 0xffffffff, 0x00000100,
1148 	0x3c210, 0xffffffff, 0x06000100,
1149 	0x3c290, 0xffffffff, 0x00000100,
1150 	0x3c274, 0xffffffff, 0x00000100,
1151 	0x3c2b4, 0xffffffff, 0x00000100,
1152 	0x3c2b0, 0xffffffff, 0x00000100,
1153 	0x3c270, 0xffffffff, 0x00000100,
1154 	0x30800, 0xffffffff, 0xe0000000,
1155 	0x3c020, 0xffffffff, 0x00010000,
1156 	0x3c024, 0xffffffff, 0x00030002,
1157 	0x3c028, 0xffffffff, 0x00040007,
1158 	0x3c02c, 0xffffffff, 0x00060005,
1159 	0x3c030, 0xffffffff, 0x00090008,
1160 	0x3c034, 0xffffffff, 0x00010000,
1161 	0x3c038, 0xffffffff, 0x00030002,
1162 	0x3c03c, 0xffffffff, 0x00040007,
1163 	0x3c040, 0xffffffff, 0x00060005,
1164 	0x3c044, 0xffffffff, 0x00090008,
1165 	0x3c048, 0xffffffff, 0x00010000,
1166 	0x3c04c, 0xffffffff, 0x00030002,
1167 	0x3c050, 0xffffffff, 0x00040007,
1168 	0x3c054, 0xffffffff, 0x00060005,
1169 	0x3c058, 0xffffffff, 0x00090008,
1170 	0x3c05c, 0xffffffff, 0x00010000,
1171 	0x3c060, 0xffffffff, 0x00030002,
1172 	0x3c064, 0xffffffff, 0x00040007,
1173 	0x3c068, 0xffffffff, 0x00060005,
1174 	0x3c06c, 0xffffffff, 0x00090008,
1175 	0x3c070, 0xffffffff, 0x00010000,
1176 	0x3c074, 0xffffffff, 0x00030002,
1177 	0x3c078, 0xffffffff, 0x00040007,
1178 	0x3c07c, 0xffffffff, 0x00060005,
1179 	0x3c080, 0xffffffff, 0x00090008,
1180 	0x3c084, 0xffffffff, 0x00010000,
1181 	0x3c088, 0xffffffff, 0x00030002,
1182 	0x3c08c, 0xffffffff, 0x00040007,
1183 	0x3c090, 0xffffffff, 0x00060005,
1184 	0x3c094, 0xffffffff, 0x00090008,
1185 	0x3c098, 0xffffffff, 0x00010000,
1186 	0x3c09c, 0xffffffff, 0x00030002,
1187 	0x3c0a0, 0xffffffff, 0x00040007,
1188 	0x3c0a4, 0xffffffff, 0x00060005,
1189 	0x3c0a8, 0xffffffff, 0x00090008,
1190 	0x3c0ac, 0xffffffff, 0x00010000,
1191 	0x3c0b0, 0xffffffff, 0x00030002,
1192 	0x3c0b4, 0xffffffff, 0x00040007,
1193 	0x3c0b8, 0xffffffff, 0x00060005,
1194 	0x3c0bc, 0xffffffff, 0x00090008,
1195 	0x3c000, 0xffffffff, 0x96e00200,
1196 	0x8708, 0xffffffff, 0x00900100,
1197 	0xc424, 0xffffffff, 0x0020003f,
1198 	0x38, 0xffffffff, 0x0140001c,
1199 	0x3c, 0x000f0000, 0x000f0000,
1200 	0x220, 0xffffffff, 0xC060000C,
1201 	0x224, 0xc0000fff, 0x00000100,
1202 	0xf90, 0xffffffff, 0x00000100,
1203 	0xf98, 0x00000101, 0x00000000,
1204 	0x20a8, 0xffffffff, 0x00000104,
1205 	0x55e4, 0xff000fff, 0x00000100,
1206 	0x30cc, 0xc0000fff, 0x00000104,
1207 	0xc1e4, 0x00000001, 0x00000001,
1208 	0xd00c, 0xff000ff0, 0x00000100,
1209 	0xd80c, 0xff000ff0, 0x00000100
1210 };
1211 
1212 static const u32 kalindi_golden_spm_registers[] =
1213 {
1214 	0x30800, 0xe0ffffff, 0xe0000000
1215 };
1216 
1217 static const u32 kalindi_golden_common_registers[] =
1218 {
1219 	0xc770, 0xffffffff, 0x00000800,
1220 	0xc774, 0xffffffff, 0x00000800,
1221 	0xc798, 0xffffffff, 0x00007fbf,
1222 	0xc79c, 0xffffffff, 0x00007faf
1223 };
1224 
1225 static const u32 kalindi_golden_registers[] =
1226 {
1227 	0x3c000, 0xffffdfff, 0x6e944040,
1228 	0x55e4, 0xff607fff, 0xfc000100,
1229 	0x3c220, 0xff000fff, 0x00000100,
1230 	0x3c224, 0xff000fff, 0x00000100,
1231 	0x3c200, 0xfffc0fff, 0x00000100,
1232 	0x6ed8, 0x00010101, 0x00010000,
1233 	0x9830, 0xffffffff, 0x00000000,
1234 	0x9834, 0xf00fffff, 0x00000400,
1235 	0x5bb0, 0x000000f0, 0x00000070,
1236 	0x5bc0, 0xf0311fff, 0x80300000,
1237 	0x98f8, 0x73773777, 0x12010001,
1238 	0x98fc, 0xffffffff, 0x00000010,
1239 	0x9b7c, 0x00ff0000, 0x00fc0000,
1240 	0x8030, 0x00001f0f, 0x0000100a,
1241 	0x2f48, 0x73773777, 0x12010001,
1242 	0x2408, 0x000fffff, 0x000c007f,
1243 	0x8a14, 0xf000003f, 0x00000007,
1244 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1245 	0x30a04, 0x0000ff0f, 0x00000000,
1246 	0x28a4c, 0x07ffffff, 0x06000000,
1247 	0x4d8, 0x00000fff, 0x00000100,
1248 	0x3e78, 0x00000001, 0x00000002,
1249 	0xc768, 0x00000008, 0x00000008,
1250 	0x8c00, 0x000000ff, 0x00000003,
1251 	0x214f8, 0x01ff01ff, 0x00000002,
1252 	0x21498, 0x007ff800, 0x00200000,
1253 	0x2015c, 0xffffffff, 0x00000f40,
1254 	0x88c4, 0x001f3ae3, 0x00000082,
1255 	0x88d4, 0x0000001f, 0x00000010,
1256 	0x30934, 0xffffffff, 0x00000000
1257 };
1258 
1259 static const u32 kalindi_mgcg_cgcg_init[] =
1260 {
1261 	0xc420, 0xffffffff, 0xfffffffc,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c2a0, 0xffffffff, 0x00000100,
1264 	0x3c208, 0xffffffff, 0x00000100,
1265 	0x3c2c0, 0xffffffff, 0x00000100,
1266 	0x3c2c8, 0xffffffff, 0x00000100,
1267 	0x3c2c4, 0xffffffff, 0x00000100,
1268 	0x55e4, 0xffffffff, 0x00600100,
1269 	0x3c280, 0xffffffff, 0x00000100,
1270 	0x3c214, 0xffffffff, 0x06000100,
1271 	0x3c220, 0xffffffff, 0x00000100,
1272 	0x3c218, 0xffffffff, 0x06000100,
1273 	0x3c204, 0xffffffff, 0x00000100,
1274 	0x3c2e0, 0xffffffff, 0x00000100,
1275 	0x3c224, 0xffffffff, 0x00000100,
1276 	0x3c200, 0xffffffff, 0x00000100,
1277 	0x3c230, 0xffffffff, 0x00000100,
1278 	0x3c234, 0xffffffff, 0x00000100,
1279 	0x3c250, 0xffffffff, 0x00000100,
1280 	0x3c254, 0xffffffff, 0x00000100,
1281 	0x3c258, 0xffffffff, 0x00000100,
1282 	0x3c25c, 0xffffffff, 0x00000100,
1283 	0x3c260, 0xffffffff, 0x00000100,
1284 	0x3c27c, 0xffffffff, 0x00000100,
1285 	0x3c278, 0xffffffff, 0x00000100,
1286 	0x3c210, 0xffffffff, 0x06000100,
1287 	0x3c290, 0xffffffff, 0x00000100,
1288 	0x3c274, 0xffffffff, 0x00000100,
1289 	0x3c2b4, 0xffffffff, 0x00000100,
1290 	0x3c2b0, 0xffffffff, 0x00000100,
1291 	0x3c270, 0xffffffff, 0x00000100,
1292 	0x30800, 0xffffffff, 0xe0000000,
1293 	0x3c020, 0xffffffff, 0x00010000,
1294 	0x3c024, 0xffffffff, 0x00030002,
1295 	0x3c028, 0xffffffff, 0x00040007,
1296 	0x3c02c, 0xffffffff, 0x00060005,
1297 	0x3c030, 0xffffffff, 0x00090008,
1298 	0x3c034, 0xffffffff, 0x00010000,
1299 	0x3c038, 0xffffffff, 0x00030002,
1300 	0x3c03c, 0xffffffff, 0x00040007,
1301 	0x3c040, 0xffffffff, 0x00060005,
1302 	0x3c044, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0x20a8, 0xffffffff, 0x00000104,
1311 	0x55e4, 0xff000fff, 0x00000100,
1312 	0x30cc, 0xc0000fff, 0x00000104,
1313 	0xc1e4, 0x00000001, 0x00000001,
1314 	0xd00c, 0xff000ff0, 0x00000100,
1315 	0xd80c, 0xff000ff0, 0x00000100
1316 };
1317 
1318 static const u32 hawaii_golden_spm_registers[] =
1319 {
1320 	0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322 
1323 static const u32 hawaii_golden_common_registers[] =
1324 {
1325 	0x30800, 0xffffffff, 0xe0000000,
1326 	0x28350, 0xffffffff, 0x3a00161a,
1327 	0x28354, 0xffffffff, 0x0000002e,
1328 	0x9a10, 0xffffffff, 0x00018208,
1329 	0x98f8, 0xffffffff, 0x12011003
1330 };
1331 
1332 static const u32 hawaii_golden_registers[] =
1333 {
1334 	0x3354, 0x00000333, 0x00000333,
1335 	0x9a10, 0x00010000, 0x00058208,
1336 	0x9830, 0xffffffff, 0x00000000,
1337 	0x9834, 0xf00fffff, 0x00000400,
1338 	0x9838, 0x0002021c, 0x00020200,
1339 	0xc78, 0x00000080, 0x00000000,
1340 	0x5bb0, 0x000000f0, 0x00000070,
1341 	0x5bc0, 0xf0311fff, 0x80300000,
1342 	0x350c, 0x00810000, 0x408af000,
1343 	0x7030, 0x31000111, 0x00000011,
1344 	0x2f48, 0x73773777, 0x12010001,
1345 	0x2120, 0x0000007f, 0x0000001b,
1346 	0x21dc, 0x00007fb6, 0x00002191,
1347 	0x3628, 0x0000003f, 0x0000000a,
1348 	0x362c, 0x0000003f, 0x0000000a,
1349 	0x2ae4, 0x00073ffe, 0x000022a2,
1350 	0x240c, 0x000007ff, 0x00000000,
1351 	0x8bf0, 0x00002001, 0x00000001,
1352 	0x8b24, 0xffffffff, 0x00ffffff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x3e78, 0x00000001, 0x00000002,
1356 	0xc768, 0x00000008, 0x00000008,
1357 	0xc770, 0x00000f00, 0x00000800,
1358 	0xc774, 0x00000f00, 0x00000800,
1359 	0xc798, 0x00ffffff, 0x00ff7fbf,
1360 	0xc79c, 0x00ffffff, 0x00ff7faf,
1361 	0x8c00, 0x000000ff, 0x00000800,
1362 	0xe40, 0x00001fff, 0x00001fff,
1363 	0x9060, 0x0000007f, 0x00000020,
1364 	0x9508, 0x00010000, 0x00010000,
1365 	0xae00, 0x00100000, 0x000ff07c,
1366 	0xac14, 0x000003ff, 0x0000000f,
1367 	0xac10, 0xffffffff, 0x7564fdec,
1368 	0xac0c, 0xffffffff, 0x3120b9a8,
1369 	0xac08, 0x20000000, 0x0f9c0000
1370 };
1371 
1372 static const u32 hawaii_mgcg_cgcg_init[] =
1373 {
1374 	0xc420, 0xffffffff, 0xfffffffd,
1375 	0x30800, 0xffffffff, 0xe0000000,
1376 	0x3c2a0, 0xffffffff, 0x00000100,
1377 	0x3c208, 0xffffffff, 0x00000100,
1378 	0x3c2c0, 0xffffffff, 0x00000100,
1379 	0x3c2c8, 0xffffffff, 0x00000100,
1380 	0x3c2c4, 0xffffffff, 0x00000100,
1381 	0x55e4, 0xffffffff, 0x00200100,
1382 	0x3c280, 0xffffffff, 0x00000100,
1383 	0x3c214, 0xffffffff, 0x06000100,
1384 	0x3c220, 0xffffffff, 0x00000100,
1385 	0x3c218, 0xffffffff, 0x06000100,
1386 	0x3c204, 0xffffffff, 0x00000100,
1387 	0x3c2e0, 0xffffffff, 0x00000100,
1388 	0x3c224, 0xffffffff, 0x00000100,
1389 	0x3c200, 0xffffffff, 0x00000100,
1390 	0x3c230, 0xffffffff, 0x00000100,
1391 	0x3c234, 0xffffffff, 0x00000100,
1392 	0x3c250, 0xffffffff, 0x00000100,
1393 	0x3c254, 0xffffffff, 0x00000100,
1394 	0x3c258, 0xffffffff, 0x00000100,
1395 	0x3c25c, 0xffffffff, 0x00000100,
1396 	0x3c260, 0xffffffff, 0x00000100,
1397 	0x3c27c, 0xffffffff, 0x00000100,
1398 	0x3c278, 0xffffffff, 0x00000100,
1399 	0x3c210, 0xffffffff, 0x06000100,
1400 	0x3c290, 0xffffffff, 0x00000100,
1401 	0x3c274, 0xffffffff, 0x00000100,
1402 	0x3c2b4, 0xffffffff, 0x00000100,
1403 	0x3c2b0, 0xffffffff, 0x00000100,
1404 	0x3c270, 0xffffffff, 0x00000100,
1405 	0x30800, 0xffffffff, 0xe0000000,
1406 	0x3c020, 0xffffffff, 0x00010000,
1407 	0x3c024, 0xffffffff, 0x00030002,
1408 	0x3c028, 0xffffffff, 0x00040007,
1409 	0x3c02c, 0xffffffff, 0x00060005,
1410 	0x3c030, 0xffffffff, 0x00090008,
1411 	0x3c034, 0xffffffff, 0x00010000,
1412 	0x3c038, 0xffffffff, 0x00030002,
1413 	0x3c03c, 0xffffffff, 0x00040007,
1414 	0x3c040, 0xffffffff, 0x00060005,
1415 	0x3c044, 0xffffffff, 0x00090008,
1416 	0x3c048, 0xffffffff, 0x00010000,
1417 	0x3c04c, 0xffffffff, 0x00030002,
1418 	0x3c050, 0xffffffff, 0x00040007,
1419 	0x3c054, 0xffffffff, 0x00060005,
1420 	0x3c058, 0xffffffff, 0x00090008,
1421 	0x3c05c, 0xffffffff, 0x00010000,
1422 	0x3c060, 0xffffffff, 0x00030002,
1423 	0x3c064, 0xffffffff, 0x00040007,
1424 	0x3c068, 0xffffffff, 0x00060005,
1425 	0x3c06c, 0xffffffff, 0x00090008,
1426 	0x3c070, 0xffffffff, 0x00010000,
1427 	0x3c074, 0xffffffff, 0x00030002,
1428 	0x3c078, 0xffffffff, 0x00040007,
1429 	0x3c07c, 0xffffffff, 0x00060005,
1430 	0x3c080, 0xffffffff, 0x00090008,
1431 	0x3c084, 0xffffffff, 0x00010000,
1432 	0x3c088, 0xffffffff, 0x00030002,
1433 	0x3c08c, 0xffffffff, 0x00040007,
1434 	0x3c090, 0xffffffff, 0x00060005,
1435 	0x3c094, 0xffffffff, 0x00090008,
1436 	0x3c098, 0xffffffff, 0x00010000,
1437 	0x3c09c, 0xffffffff, 0x00030002,
1438 	0x3c0a0, 0xffffffff, 0x00040007,
1439 	0x3c0a4, 0xffffffff, 0x00060005,
1440 	0x3c0a8, 0xffffffff, 0x00090008,
1441 	0x3c0ac, 0xffffffff, 0x00010000,
1442 	0x3c0b0, 0xffffffff, 0x00030002,
1443 	0x3c0b4, 0xffffffff, 0x00040007,
1444 	0x3c0b8, 0xffffffff, 0x00060005,
1445 	0x3c0bc, 0xffffffff, 0x00090008,
1446 	0x3c0c0, 0xffffffff, 0x00010000,
1447 	0x3c0c4, 0xffffffff, 0x00030002,
1448 	0x3c0c8, 0xffffffff, 0x00040007,
1449 	0x3c0cc, 0xffffffff, 0x00060005,
1450 	0x3c0d0, 0xffffffff, 0x00090008,
1451 	0x3c0d4, 0xffffffff, 0x00010000,
1452 	0x3c0d8, 0xffffffff, 0x00030002,
1453 	0x3c0dc, 0xffffffff, 0x00040007,
1454 	0x3c0e0, 0xffffffff, 0x00060005,
1455 	0x3c0e4, 0xffffffff, 0x00090008,
1456 	0x3c0e8, 0xffffffff, 0x00010000,
1457 	0x3c0ec, 0xffffffff, 0x00030002,
1458 	0x3c0f0, 0xffffffff, 0x00040007,
1459 	0x3c0f4, 0xffffffff, 0x00060005,
1460 	0x3c0f8, 0xffffffff, 0x00090008,
1461 	0xc318, 0xffffffff, 0x00020200,
1462 	0x3350, 0xffffffff, 0x00000200,
1463 	0x15c0, 0xffffffff, 0x00000400,
1464 	0x55e8, 0xffffffff, 0x00000000,
1465 	0x2f50, 0xffffffff, 0x00000902,
1466 	0x3c000, 0xffffffff, 0x96940200,
1467 	0x8708, 0xffffffff, 0x00900100,
1468 	0xc424, 0xffffffff, 0x0020003f,
1469 	0x38, 0xffffffff, 0x0140001c,
1470 	0x3c, 0x000f0000, 0x000f0000,
1471 	0x220, 0xffffffff, 0xc060000c,
1472 	0x224, 0xc0000fff, 0x00000100,
1473 	0xf90, 0xffffffff, 0x00000100,
1474 	0xf98, 0x00000101, 0x00000000,
1475 	0x20a8, 0xffffffff, 0x00000104,
1476 	0x55e4, 0xff000fff, 0x00000100,
1477 	0x30cc, 0xc0000fff, 0x00000104,
1478 	0xc1e4, 0x00000001, 0x00000001,
1479 	0xd00c, 0xff000ff0, 0x00000100,
1480 	0xd80c, 0xff000ff0, 0x00000100
1481 };
1482 
1483 static const u32 godavari_golden_registers[] =
1484 {
1485 	0x55e4, 0xff607fff, 0xfc000100,
1486 	0x6ed8, 0x00010101, 0x00010000,
1487 	0x9830, 0xffffffff, 0x00000000,
1488 	0x98302, 0xf00fffff, 0x00000400,
1489 	0x6130, 0xffffffff, 0x00010000,
1490 	0x5bb0, 0x000000f0, 0x00000070,
1491 	0x5bc0, 0xf0311fff, 0x80300000,
1492 	0x98f8, 0x73773777, 0x12010001,
1493 	0x98fc, 0xffffffff, 0x00000010,
1494 	0x8030, 0x00001f0f, 0x0000100a,
1495 	0x2f48, 0x73773777, 0x12010001,
1496 	0x2408, 0x000fffff, 0x000c007f,
1497 	0x8a14, 0xf000003f, 0x00000007,
1498 	0x8b24, 0xffffffff, 0x00ff0fff,
1499 	0x30a04, 0x0000ff0f, 0x00000000,
1500 	0x28a4c, 0x07ffffff, 0x06000000,
1501 	0x4d8, 0x00000fff, 0x00000100,
1502 	0xd014, 0x00010000, 0x00810001,
1503 	0xd814, 0x00010000, 0x00810001,
1504 	0x3e78, 0x00000001, 0x00000002,
1505 	0xc768, 0x00000008, 0x00000008,
1506 	0xc770, 0x00000f00, 0x00000800,
1507 	0xc774, 0x00000f00, 0x00000800,
1508 	0xc798, 0x00ffffff, 0x00ff7fbf,
1509 	0xc79c, 0x00ffffff, 0x00ff7faf,
1510 	0x8c00, 0x000000ff, 0x00000001,
1511 	0x214f8, 0x01ff01ff, 0x00000002,
1512 	0x21498, 0x007ff800, 0x00200000,
1513 	0x2015c, 0xffffffff, 0x00000f40,
1514 	0x88c4, 0x001f3ae3, 0x00000082,
1515 	0x88d4, 0x0000001f, 0x00000010,
1516 	0x30934, 0xffffffff, 0x00000000
1517 };
1518 
1519 
1520 static void cik_init_golden_registers(struct radeon_device *rdev)
1521 {
1522 	switch (rdev->family) {
1523 	case CHIP_BONAIRE:
1524 		radeon_program_register_sequence(rdev,
1525 						 bonaire_mgcg_cgcg_init,
1526 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1527 		radeon_program_register_sequence(rdev,
1528 						 bonaire_golden_registers,
1529 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1530 		radeon_program_register_sequence(rdev,
1531 						 bonaire_golden_common_registers,
1532 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1533 		radeon_program_register_sequence(rdev,
1534 						 bonaire_golden_spm_registers,
1535 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1536 		break;
1537 	case CHIP_KABINI:
1538 		radeon_program_register_sequence(rdev,
1539 						 kalindi_mgcg_cgcg_init,
1540 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1541 		radeon_program_register_sequence(rdev,
1542 						 kalindi_golden_registers,
1543 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1544 		radeon_program_register_sequence(rdev,
1545 						 kalindi_golden_common_registers,
1546 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1547 		radeon_program_register_sequence(rdev,
1548 						 kalindi_golden_spm_registers,
1549 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1550 		break;
1551 	case CHIP_MULLINS:
1552 		radeon_program_register_sequence(rdev,
1553 						 kalindi_mgcg_cgcg_init,
1554 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1555 		radeon_program_register_sequence(rdev,
1556 						 godavari_golden_registers,
1557 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1558 		radeon_program_register_sequence(rdev,
1559 						 kalindi_golden_common_registers,
1560 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1561 		radeon_program_register_sequence(rdev,
1562 						 kalindi_golden_spm_registers,
1563 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1564 		break;
1565 	case CHIP_KAVERI:
1566 		radeon_program_register_sequence(rdev,
1567 						 spectre_mgcg_cgcg_init,
1568 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1569 		radeon_program_register_sequence(rdev,
1570 						 spectre_golden_registers,
1571 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1572 		radeon_program_register_sequence(rdev,
1573 						 spectre_golden_common_registers,
1574 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1575 		radeon_program_register_sequence(rdev,
1576 						 spectre_golden_spm_registers,
1577 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1578 		break;
1579 	case CHIP_HAWAII:
1580 		radeon_program_register_sequence(rdev,
1581 						 hawaii_mgcg_cgcg_init,
1582 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1583 		radeon_program_register_sequence(rdev,
1584 						 hawaii_golden_registers,
1585 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1586 		radeon_program_register_sequence(rdev,
1587 						 hawaii_golden_common_registers,
1588 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1589 		radeon_program_register_sequence(rdev,
1590 						 hawaii_golden_spm_registers,
1591 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1592 		break;
1593 	default:
1594 		break;
1595 	}
1596 }
1597 
1598 /**
1599  * cik_get_xclk - get the xclk
1600  *
1601  * @rdev: radeon_device pointer
1602  *
1603  * Returns the reference clock used by the gfx engine
1604  * (CIK).
1605  */
1606 u32 cik_get_xclk(struct radeon_device *rdev)
1607 {
1608         u32 reference_clock = rdev->clock.spll.reference_freq;
1609 
1610 	if (rdev->flags & RADEON_IS_IGP) {
1611 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1612 			return reference_clock / 2;
1613 	} else {
1614 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1615 			return reference_clock / 4;
1616 	}
1617 	return reference_clock;
1618 }
1619 
1620 /**
1621  * cik_mm_rdoorbell - read a doorbell dword
1622  *
1623  * @rdev: radeon_device pointer
1624  * @index: doorbell index
1625  *
1626  * Returns the value in the doorbell aperture at the
1627  * requested doorbell index (CIK).
1628  */
1629 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1630 {
1631 	if (index < rdev->doorbell.num_doorbells) {
1632 		return readl(rdev->doorbell.ptr + index);
1633 	} else {
1634 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1635 		return 0;
1636 	}
1637 }
1638 
1639 /**
1640  * cik_mm_wdoorbell - write a doorbell dword
1641  *
1642  * @rdev: radeon_device pointer
1643  * @index: doorbell index
1644  * @v: value to write
1645  *
1646  * Writes @v to the doorbell aperture at the
1647  * requested doorbell index (CIK).
1648  */
1649 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1650 {
1651 	if (index < rdev->doorbell.num_doorbells) {
1652 		writel(v, rdev->doorbell.ptr + index);
1653 	} else {
1654 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1655 	}
1656 }
1657 
1658 #define BONAIRE_IO_MC_REGS_SIZE 36
1659 
1660 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1661 {
1662 	{0x00000070, 0x04400000},
1663 	{0x00000071, 0x80c01803},
1664 	{0x00000072, 0x00004004},
1665 	{0x00000073, 0x00000100},
1666 	{0x00000074, 0x00ff0000},
1667 	{0x00000075, 0x34000000},
1668 	{0x00000076, 0x08000014},
1669 	{0x00000077, 0x00cc08ec},
1670 	{0x00000078, 0x00000400},
1671 	{0x00000079, 0x00000000},
1672 	{0x0000007a, 0x04090000},
1673 	{0x0000007c, 0x00000000},
1674 	{0x0000007e, 0x4408a8e8},
1675 	{0x0000007f, 0x00000304},
1676 	{0x00000080, 0x00000000},
1677 	{0x00000082, 0x00000001},
1678 	{0x00000083, 0x00000002},
1679 	{0x00000084, 0xf3e4f400},
1680 	{0x00000085, 0x052024e3},
1681 	{0x00000087, 0x00000000},
1682 	{0x00000088, 0x01000000},
1683 	{0x0000008a, 0x1c0a0000},
1684 	{0x0000008b, 0xff010000},
1685 	{0x0000008d, 0xffffefff},
1686 	{0x0000008e, 0xfff3efff},
1687 	{0x0000008f, 0xfff3efbf},
1688 	{0x00000092, 0xf7ffffff},
1689 	{0x00000093, 0xffffff7f},
1690 	{0x00000095, 0x00101101},
1691 	{0x00000096, 0x00000fff},
1692 	{0x00000097, 0x00116fff},
1693 	{0x00000098, 0x60010000},
1694 	{0x00000099, 0x10010000},
1695 	{0x0000009a, 0x00006000},
1696 	{0x0000009b, 0x00001000},
1697 	{0x0000009f, 0x00b48000}
1698 };
1699 
1700 #define HAWAII_IO_MC_REGS_SIZE 22
1701 
1702 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1703 {
1704 	{0x0000007d, 0x40000000},
1705 	{0x0000007e, 0x40180304},
1706 	{0x0000007f, 0x0000ff00},
1707 	{0x00000081, 0x00000000},
1708 	{0x00000083, 0x00000800},
1709 	{0x00000086, 0x00000000},
1710 	{0x00000087, 0x00000100},
1711 	{0x00000088, 0x00020100},
1712 	{0x00000089, 0x00000000},
1713 	{0x0000008b, 0x00040000},
1714 	{0x0000008c, 0x00000100},
1715 	{0x0000008e, 0xff010000},
1716 	{0x00000090, 0xffffefff},
1717 	{0x00000091, 0xfff3efff},
1718 	{0x00000092, 0xfff3efbf},
1719 	{0x00000093, 0xf7ffffff},
1720 	{0x00000094, 0xffffff7f},
1721 	{0x00000095, 0x00000fff},
1722 	{0x00000096, 0x00116fff},
1723 	{0x00000097, 0x60010000},
1724 	{0x00000098, 0x10010000},
1725 	{0x0000009f, 0x00c79000}
1726 };
1727 
1728 
1729 /**
1730  * cik_srbm_select - select specific register instances
1731  *
1732  * @rdev: radeon_device pointer
1733  * @me: selected ME (micro engine)
1734  * @pipe: pipe
1735  * @queue: queue
1736  * @vmid: VMID
1737  *
1738  * Switches the currently active registers instances.  Some
1739  * registers are instanced per VMID, others are instanced per
1740  * me/pipe/queue combination.
1741  */
1742 static void cik_srbm_select(struct radeon_device *rdev,
1743 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1744 {
1745 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1746 			     MEID(me & 0x3) |
1747 			     VMID(vmid & 0xf) |
1748 			     QUEUEID(queue & 0x7));
1749 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1750 }
1751 
1752 /* ucode loading */
1753 /**
1754  * ci_mc_load_microcode - load MC ucode into the hw
1755  *
1756  * @rdev: radeon_device pointer
1757  *
1758  * Load the GDDR MC ucode into the hw (CIK).
1759  * Returns 0 on success, error on failure.
1760  */
1761 int ci_mc_load_microcode(struct radeon_device *rdev)
1762 {
1763 	const __be32 *fw_data;
1764 	u32 running, blackout = 0;
1765 	u32 *io_mc_regs;
1766 	int i, regs_size, ucode_size;
1767 
1768 	if (!rdev->mc_fw)
1769 		return -EINVAL;
1770 
1771 	ucode_size = rdev->mc_fw->size / 4;
1772 
1773 	switch (rdev->family) {
1774 	case CHIP_BONAIRE:
1775 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1776 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1777 		break;
1778 	case CHIP_HAWAII:
1779 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1780 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1781 		break;
1782 	default:
1783 		return -EINVAL;
1784 	}
1785 
1786 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1787 
1788 	if (running == 0) {
1789 		if (running) {
1790 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1791 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1792 		}
1793 
1794 		/* reset the engine and set to writable */
1795 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1796 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1797 
1798 		/* load mc io regs */
1799 		for (i = 0; i < regs_size; i++) {
1800 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1801 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1802 		}
1803 		/* load the MC ucode */
1804 		fw_data = (const __be32 *)rdev->mc_fw->data;
1805 		for (i = 0; i < ucode_size; i++)
1806 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1807 
1808 		/* put the engine back into the active state */
1809 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1810 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1811 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1812 
1813 		/* wait for training to complete */
1814 		for (i = 0; i < rdev->usec_timeout; i++) {
1815 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1816 				break;
1817 			udelay(1);
1818 		}
1819 		for (i = 0; i < rdev->usec_timeout; i++) {
1820 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1821 				break;
1822 			udelay(1);
1823 		}
1824 
1825 		if (running)
1826 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1827 	}
1828 
1829 	return 0;
1830 }
1831 
1832 /**
1833  * cik_init_microcode - load ucode images from disk
1834  *
1835  * @rdev: radeon_device pointer
1836  *
1837  * Use the firmware interface to load the ucode images into
1838  * the driver (not loaded into hw).
1839  * Returns 0 on success, error on failure.
1840  */
1841 static int cik_init_microcode(struct radeon_device *rdev)
1842 {
1843 	const char *chip_name;
1844 	size_t pfp_req_size, me_req_size, ce_req_size,
1845 		mec_req_size, rlc_req_size, mc_req_size = 0,
1846 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1847 	char fw_name[30];
1848 	int err;
1849 
1850 	DRM_DEBUG("\n");
1851 
1852 	switch (rdev->family) {
1853 	case CHIP_BONAIRE:
1854 		chip_name = "BONAIRE";
1855 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1856 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1857 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1858 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1859 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1860 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1861 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1862 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1863 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1864 		break;
1865 	case CHIP_HAWAII:
1866 		chip_name = "HAWAII";
1867 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1868 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1869 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1870 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1871 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1872 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1873 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1874 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1875 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1876 		break;
1877 	case CHIP_KAVERI:
1878 		chip_name = "KAVERI";
1879 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1880 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1881 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1882 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1883 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1884 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1885 		break;
1886 	case CHIP_KABINI:
1887 		chip_name = "KABINI";
1888 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1889 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1890 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1891 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1892 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1893 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1894 		break;
1895 	case CHIP_MULLINS:
1896 		chip_name = "MULLINS";
1897 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1898 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1899 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1900 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1901 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1902 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1903 		break;
1904 	default: BUG();
1905 	}
1906 
1907 	DRM_INFO("Loading %s Microcode\n", chip_name);
1908 
1909 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1910 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1911 	if (err)
1912 		goto out;
1913 	if (rdev->pfp_fw->size != pfp_req_size) {
1914 		printk(KERN_ERR
1915 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1916 		       rdev->pfp_fw->size, fw_name);
1917 		err = -EINVAL;
1918 		goto out;
1919 	}
1920 
1921 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1922 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1923 	if (err)
1924 		goto out;
1925 	if (rdev->me_fw->size != me_req_size) {
1926 		printk(KERN_ERR
1927 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1928 		       rdev->me_fw->size, fw_name);
1929 		err = -EINVAL;
1930 	}
1931 
1932 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1933 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1934 	if (err)
1935 		goto out;
1936 	if (rdev->ce_fw->size != ce_req_size) {
1937 		printk(KERN_ERR
1938 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1939 		       rdev->ce_fw->size, fw_name);
1940 		err = -EINVAL;
1941 	}
1942 
1943 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1944 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1945 	if (err)
1946 		goto out;
1947 	if (rdev->mec_fw->size != mec_req_size) {
1948 		printk(KERN_ERR
1949 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1950 		       rdev->mec_fw->size, fw_name);
1951 		err = -EINVAL;
1952 	}
1953 
1954 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1955 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1956 	if (err)
1957 		goto out;
1958 	if (rdev->rlc_fw->size != rlc_req_size) {
1959 		printk(KERN_ERR
1960 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1961 		       rdev->rlc_fw->size, fw_name);
1962 		err = -EINVAL;
1963 	}
1964 
1965 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1966 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1967 	if (err)
1968 		goto out;
1969 	if (rdev->sdma_fw->size != sdma_req_size) {
1970 		printk(KERN_ERR
1971 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1972 		       rdev->sdma_fw->size, fw_name);
1973 		err = -EINVAL;
1974 	}
1975 
1976 	/* No SMC, MC ucode on APUs */
1977 	if (!(rdev->flags & RADEON_IS_IGP)) {
1978 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1979 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1980 		if (err) {
1981 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1982 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1983 			if (err)
1984 				goto out;
1985 		}
1986 		if ((rdev->mc_fw->size != mc_req_size) &&
1987 		    (rdev->mc_fw->size != mc2_req_size)){
1988 			printk(KERN_ERR
1989 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1990 			       rdev->mc_fw->size, fw_name);
1991 			err = -EINVAL;
1992 		}
1993 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1994 
1995 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1996 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1997 		if (err) {
1998 			printk(KERN_ERR
1999 			       "smc: error loading firmware \"%s\"\n",
2000 			       fw_name);
2001 			release_firmware(rdev->smc_fw);
2002 			rdev->smc_fw = NULL;
2003 			err = 0;
2004 		} else if (rdev->smc_fw->size != smc_req_size) {
2005 			printk(KERN_ERR
2006 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2007 			       rdev->smc_fw->size, fw_name);
2008 			err = -EINVAL;
2009 		}
2010 	}
2011 
2012 out:
2013 	if (err) {
2014 		if (err != -EINVAL)
2015 			printk(KERN_ERR
2016 			       "cik_cp: Failed to load firmware \"%s\"\n",
2017 			       fw_name);
2018 		release_firmware(rdev->pfp_fw);
2019 		rdev->pfp_fw = NULL;
2020 		release_firmware(rdev->me_fw);
2021 		rdev->me_fw = NULL;
2022 		release_firmware(rdev->ce_fw);
2023 		rdev->ce_fw = NULL;
2024 		release_firmware(rdev->rlc_fw);
2025 		rdev->rlc_fw = NULL;
2026 		release_firmware(rdev->mc_fw);
2027 		rdev->mc_fw = NULL;
2028 		release_firmware(rdev->smc_fw);
2029 		rdev->smc_fw = NULL;
2030 	}
2031 	return err;
2032 }
2033 
2034 /*
2035  * Core functions
2036  */
2037 /**
2038  * cik_tiling_mode_table_init - init the hw tiling table
2039  *
2040  * @rdev: radeon_device pointer
2041  *
2042  * Starting with SI, the tiling setup is done globally in a
2043  * set of 32 tiling modes.  Rather than selecting each set of
2044  * parameters per surface as on older asics, we just select
2045  * which index in the tiling table we want to use, and the
2046  * surface uses those parameters (CIK).
2047  */
2048 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2049 {
2050 	const u32 num_tile_mode_states = 32;
2051 	const u32 num_secondary_tile_mode_states = 16;
2052 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2053 	u32 num_pipe_configs;
2054 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2055 		rdev->config.cik.max_shader_engines;
2056 
2057 	switch (rdev->config.cik.mem_row_size_in_kb) {
2058 	case 1:
2059 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2060 		break;
2061 	case 2:
2062 	default:
2063 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2064 		break;
2065 	case 4:
2066 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2067 		break;
2068 	}
2069 
2070 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2071 	if (num_pipe_configs > 8)
2072 		num_pipe_configs = 16;
2073 
2074 	if (num_pipe_configs == 16) {
2075 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2076 			switch (reg_offset) {
2077 			case 0:
2078 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2080 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2081 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2082 				break;
2083 			case 1:
2084 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2086 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2088 				break;
2089 			case 2:
2090 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2092 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2093 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2094 				break;
2095 			case 3:
2096 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2098 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2099 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2100 				break;
2101 			case 4:
2102 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2104 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2105 						 TILE_SPLIT(split_equal_to_row_size));
2106 				break;
2107 			case 5:
2108 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111 				break;
2112 			case 6:
2113 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2114 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2115 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2117 				break;
2118 			case 7:
2119 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2120 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2121 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2122 						 TILE_SPLIT(split_equal_to_row_size));
2123 				break;
2124 			case 8:
2125 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2126 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2127 				break;
2128 			case 9:
2129 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2130 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2131 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2132 				break;
2133 			case 10:
2134 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2136 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2137 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 				break;
2139 			case 11:
2140 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2141 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2142 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2143 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144 				break;
2145 			case 12:
2146 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2149 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2150 				break;
2151 			case 13:
2152 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2154 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2155 				break;
2156 			case 14:
2157 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2160 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 				break;
2162 			case 16:
2163 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2164 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2166 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 				break;
2168 			case 17:
2169 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2170 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2172 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173 				break;
2174 			case 27:
2175 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2177 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2178 				break;
2179 			case 28:
2180 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2182 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2183 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 				break;
2185 			case 29:
2186 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2188 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2189 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 				break;
2191 			case 30:
2192 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2194 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2195 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 				break;
2197 			default:
2198 				gb_tile_moden = 0;
2199 				break;
2200 			}
2201 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2202 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2203 		}
2204 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2205 			switch (reg_offset) {
2206 			case 0:
2207 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210 						 NUM_BANKS(ADDR_SURF_16_BANK));
2211 				break;
2212 			case 1:
2213 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2214 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2215 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216 						 NUM_BANKS(ADDR_SURF_16_BANK));
2217 				break;
2218 			case 2:
2219 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2221 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2222 						 NUM_BANKS(ADDR_SURF_16_BANK));
2223 				break;
2224 			case 3:
2225 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2228 						 NUM_BANKS(ADDR_SURF_16_BANK));
2229 				break;
2230 			case 4:
2231 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2233 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2234 						 NUM_BANKS(ADDR_SURF_8_BANK));
2235 				break;
2236 			case 5:
2237 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2240 						 NUM_BANKS(ADDR_SURF_4_BANK));
2241 				break;
2242 			case 6:
2243 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2246 						 NUM_BANKS(ADDR_SURF_2_BANK));
2247 				break;
2248 			case 8:
2249 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 						 NUM_BANKS(ADDR_SURF_16_BANK));
2253 				break;
2254 			case 9:
2255 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 						 NUM_BANKS(ADDR_SURF_16_BANK));
2259 				break;
2260 			case 10:
2261 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK));
2265 				break;
2266 			case 11:
2267 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 						 NUM_BANKS(ADDR_SURF_8_BANK));
2271 				break;
2272 			case 12:
2273 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2276 						 NUM_BANKS(ADDR_SURF_4_BANK));
2277 				break;
2278 			case 13:
2279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 						 NUM_BANKS(ADDR_SURF_2_BANK));
2283 				break;
2284 			case 14:
2285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2288 						 NUM_BANKS(ADDR_SURF_2_BANK));
2289 				break;
2290 			default:
2291 				gb_tile_moden = 0;
2292 				break;
2293 			}
2294 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2295 		}
2296 	} else if (num_pipe_configs == 8) {
2297 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2298 			switch (reg_offset) {
2299 			case 0:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2303 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2304 				break;
2305 			case 1:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2310 				break;
2311 			case 2:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2315 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316 				break;
2317 			case 3:
2318 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2320 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2321 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2322 				break;
2323 			case 4:
2324 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2327 						 TILE_SPLIT(split_equal_to_row_size));
2328 				break;
2329 			case 5:
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 				break;
2334 			case 6:
2335 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2336 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2338 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2339 				break;
2340 			case 7:
2341 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2342 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2344 						 TILE_SPLIT(split_equal_to_row_size));
2345 				break;
2346 			case 8:
2347 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2349 				break;
2350 			case 9:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2352 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2353 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2354 				break;
2355 			case 10:
2356 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2359 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 				break;
2361 			case 11:
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 				break;
2367 			case 12:
2368 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372 				break;
2373 			case 13:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2375 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2376 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2377 				break;
2378 			case 14:
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2382 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 				break;
2384 			case 16:
2385 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2388 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 				break;
2390 			case 17:
2391 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2394 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 				break;
2396 			case 27:
2397 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2398 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2400 				break;
2401 			case 28:
2402 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2404 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2405 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 				break;
2407 			case 29:
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2411 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 				break;
2413 			case 30:
2414 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2417 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 				break;
2419 			default:
2420 				gb_tile_moden = 0;
2421 				break;
2422 			}
2423 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2424 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2425 		}
2426 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2427 			switch (reg_offset) {
2428 			case 0:
2429 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2432 						 NUM_BANKS(ADDR_SURF_16_BANK));
2433 				break;
2434 			case 1:
2435 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2437 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2438 						 NUM_BANKS(ADDR_SURF_16_BANK));
2439 				break;
2440 			case 2:
2441 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2443 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 						 NUM_BANKS(ADDR_SURF_16_BANK));
2445 				break;
2446 			case 3:
2447 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450 						 NUM_BANKS(ADDR_SURF_16_BANK));
2451 				break;
2452 			case 4:
2453 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 						 NUM_BANKS(ADDR_SURF_8_BANK));
2457 				break;
2458 			case 5:
2459 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462 						 NUM_BANKS(ADDR_SURF_4_BANK));
2463 				break;
2464 			case 6:
2465 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 						 NUM_BANKS(ADDR_SURF_2_BANK));
2469 				break;
2470 			case 8:
2471 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2473 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 						 NUM_BANKS(ADDR_SURF_16_BANK));
2475 				break;
2476 			case 9:
2477 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2480 						 NUM_BANKS(ADDR_SURF_16_BANK));
2481 				break;
2482 			case 10:
2483 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486 						 NUM_BANKS(ADDR_SURF_16_BANK));
2487 				break;
2488 			case 11:
2489 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2492 						 NUM_BANKS(ADDR_SURF_16_BANK));
2493 				break;
2494 			case 12:
2495 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498 						 NUM_BANKS(ADDR_SURF_8_BANK));
2499 				break;
2500 			case 13:
2501 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 						 NUM_BANKS(ADDR_SURF_4_BANK));
2505 				break;
2506 			case 14:
2507 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510 						 NUM_BANKS(ADDR_SURF_2_BANK));
2511 				break;
2512 			default:
2513 				gb_tile_moden = 0;
2514 				break;
2515 			}
2516 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2517 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2518 		}
2519 	} else if (num_pipe_configs == 4) {
2520 		if (num_rbs == 4) {
2521 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2522 				switch (reg_offset) {
2523 				case 0:
2524 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528 					break;
2529 				case 1:
2530 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2534 					break;
2535 				case 2:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 					break;
2541 				case 3:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2546 					break;
2547 				case 4:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 TILE_SPLIT(split_equal_to_row_size));
2552 					break;
2553 				case 5:
2554 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557 					break;
2558 				case 6:
2559 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2563 					break;
2564 				case 7:
2565 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2566 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568 							 TILE_SPLIT(split_equal_to_row_size));
2569 					break;
2570 				case 8:
2571 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2573 					break;
2574 				case 9:
2575 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2578 					break;
2579 				case 10:
2580 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584 					break;
2585 				case 11:
2586 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2587 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2589 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 					break;
2591 				case 12:
2592 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2593 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 					break;
2597 				case 13:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2601 					break;
2602 				case 14:
2603 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 					break;
2608 				case 16:
2609 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2612 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613 					break;
2614 				case 17:
2615 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2616 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2617 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2619 					break;
2620 				case 27:
2621 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2624 					break;
2625 				case 28:
2626 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2627 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 					break;
2631 				case 29:
2632 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2633 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2635 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 					break;
2637 				case 30:
2638 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642 					break;
2643 				default:
2644 					gb_tile_moden = 0;
2645 					break;
2646 				}
2647 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2648 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2649 			}
2650 		} else if (num_rbs < 4) {
2651 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2652 				switch (reg_offset) {
2653 				case 0:
2654 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2656 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2657 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2658 					break;
2659 				case 1:
2660 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664 					break;
2665 				case 2:
2666 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2668 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2670 					break;
2671 				case 3:
2672 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2676 					break;
2677 				case 4:
2678 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2680 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681 							 TILE_SPLIT(split_equal_to_row_size));
2682 					break;
2683 				case 5:
2684 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687 					break;
2688 				case 6:
2689 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2690 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2692 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693 					break;
2694 				case 7:
2695 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 							 TILE_SPLIT(split_equal_to_row_size));
2699 					break;
2700 				case 8:
2701 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2703 					break;
2704 				case 9:
2705 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708 					break;
2709 				case 10:
2710 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2713 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 					break;
2715 				case 11:
2716 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2717 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2719 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 					break;
2721 				case 12:
2722 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 					break;
2727 				case 13:
2728 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2731 					break;
2732 				case 14:
2733 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2735 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 					break;
2738 				case 16:
2739 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2740 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 					break;
2744 				case 17:
2745 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2746 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749 					break;
2750 				case 27:
2751 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2752 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2754 					break;
2755 				case 28:
2756 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2758 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760 					break;
2761 				case 29:
2762 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2763 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2764 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 					break;
2767 				case 30:
2768 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772 					break;
2773 				default:
2774 					gb_tile_moden = 0;
2775 					break;
2776 				}
2777 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2778 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 			}
2780 		}
2781 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787 						 NUM_BANKS(ADDR_SURF_16_BANK));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 						 NUM_BANKS(ADDR_SURF_16_BANK));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799 						 NUM_BANKS(ADDR_SURF_16_BANK));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2804 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2805 						 NUM_BANKS(ADDR_SURF_16_BANK));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2811 						 NUM_BANKS(ADDR_SURF_16_BANK));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817 						 NUM_BANKS(ADDR_SURF_8_BANK));
2818 				break;
2819 			case 6:
2820 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 						 NUM_BANKS(ADDR_SURF_4_BANK));
2824 				break;
2825 			case 8:
2826 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2827 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2828 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 						 NUM_BANKS(ADDR_SURF_16_BANK));
2830 				break;
2831 			case 9:
2832 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 						 NUM_BANKS(ADDR_SURF_16_BANK));
2836 				break;
2837 			case 10:
2838 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841 						 NUM_BANKS(ADDR_SURF_16_BANK));
2842 				break;
2843 			case 11:
2844 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 						 NUM_BANKS(ADDR_SURF_16_BANK));
2848 				break;
2849 			case 12:
2850 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2853 						 NUM_BANKS(ADDR_SURF_16_BANK));
2854 				break;
2855 			case 13:
2856 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859 						 NUM_BANKS(ADDR_SURF_8_BANK));
2860 				break;
2861 			case 14:
2862 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2865 						 NUM_BANKS(ADDR_SURF_4_BANK));
2866 				break;
2867 			default:
2868 				gb_tile_moden = 0;
2869 				break;
2870 			}
2871 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2872 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2873 		}
2874 	} else if (num_pipe_configs == 2) {
2875 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2876 			switch (reg_offset) {
2877 			case 0:
2878 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2880 						 PIPE_CONFIG(ADDR_SURF_P2) |
2881 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2882 				break;
2883 			case 1:
2884 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 						 PIPE_CONFIG(ADDR_SURF_P2) |
2887 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888 				break;
2889 			case 2:
2890 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2892 						 PIPE_CONFIG(ADDR_SURF_P2) |
2893 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2894 				break;
2895 			case 3:
2896 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 						 PIPE_CONFIG(ADDR_SURF_P2) |
2899 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2900 				break;
2901 			case 4:
2902 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2904 						 PIPE_CONFIG(ADDR_SURF_P2) |
2905 						 TILE_SPLIT(split_equal_to_row_size));
2906 				break;
2907 			case 5:
2908 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 						 PIPE_CONFIG(ADDR_SURF_P2) |
2910 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 				break;
2912 			case 6:
2913 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 						 PIPE_CONFIG(ADDR_SURF_P2) |
2916 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917 				break;
2918 			case 7:
2919 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 						 PIPE_CONFIG(ADDR_SURF_P2) |
2922 						 TILE_SPLIT(split_equal_to_row_size));
2923 				break;
2924 			case 8:
2925 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2926 						PIPE_CONFIG(ADDR_SURF_P2);
2927 				break;
2928 			case 9:
2929 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 						 PIPE_CONFIG(ADDR_SURF_P2));
2932 				break;
2933 			case 10:
2934 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 						 PIPE_CONFIG(ADDR_SURF_P2) |
2937 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 				break;
2939 			case 11:
2940 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 						 PIPE_CONFIG(ADDR_SURF_P2) |
2943 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 				break;
2945 			case 12:
2946 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 						 PIPE_CONFIG(ADDR_SURF_P2) |
2949 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 				break;
2951 			case 13:
2952 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2953 						 PIPE_CONFIG(ADDR_SURF_P2) |
2954 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2955 				break;
2956 			case 14:
2957 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 						 PIPE_CONFIG(ADDR_SURF_P2) |
2960 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 				break;
2962 			case 16:
2963 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 						 PIPE_CONFIG(ADDR_SURF_P2) |
2966 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 				break;
2968 			case 17:
2969 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2970 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 						 PIPE_CONFIG(ADDR_SURF_P2) |
2972 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973 				break;
2974 			case 27:
2975 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2976 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 						 PIPE_CONFIG(ADDR_SURF_P2));
2978 				break;
2979 			case 28:
2980 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2981 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2982 						 PIPE_CONFIG(ADDR_SURF_P2) |
2983 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984 				break;
2985 			case 29:
2986 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988 						 PIPE_CONFIG(ADDR_SURF_P2) |
2989 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2990 				break;
2991 			case 30:
2992 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994 						 PIPE_CONFIG(ADDR_SURF_P2) |
2995 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996 				break;
2997 			default:
2998 				gb_tile_moden = 0;
2999 				break;
3000 			}
3001 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3002 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3003 		}
3004 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3005 			switch (reg_offset) {
3006 			case 0:
3007 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 						 NUM_BANKS(ADDR_SURF_16_BANK));
3011 				break;
3012 			case 1:
3013 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3014 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 						 NUM_BANKS(ADDR_SURF_16_BANK));
3017 				break;
3018 			case 2:
3019 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 						 NUM_BANKS(ADDR_SURF_16_BANK));
3023 				break;
3024 			case 3:
3025 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028 						 NUM_BANKS(ADDR_SURF_16_BANK));
3029 				break;
3030 			case 4:
3031 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 						 NUM_BANKS(ADDR_SURF_16_BANK));
3035 				break;
3036 			case 5:
3037 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3038 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3039 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3040 						 NUM_BANKS(ADDR_SURF_16_BANK));
3041 				break;
3042 			case 6:
3043 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046 						 NUM_BANKS(ADDR_SURF_8_BANK));
3047 				break;
3048 			case 8:
3049 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3050 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3051 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052 						 NUM_BANKS(ADDR_SURF_16_BANK));
3053 				break;
3054 			case 9:
3055 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3056 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3057 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3058 						 NUM_BANKS(ADDR_SURF_16_BANK));
3059 				break;
3060 			case 10:
3061 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3062 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 						 NUM_BANKS(ADDR_SURF_16_BANK));
3065 				break;
3066 			case 11:
3067 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3068 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3069 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070 						 NUM_BANKS(ADDR_SURF_16_BANK));
3071 				break;
3072 			case 12:
3073 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3076 						 NUM_BANKS(ADDR_SURF_16_BANK));
3077 				break;
3078 			case 13:
3079 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3082 						 NUM_BANKS(ADDR_SURF_16_BANK));
3083 				break;
3084 			case 14:
3085 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3088 						 NUM_BANKS(ADDR_SURF_8_BANK));
3089 				break;
3090 			default:
3091 				gb_tile_moden = 0;
3092 				break;
3093 			}
3094 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3095 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3096 		}
3097 	} else
3098 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3099 }
3100 
3101 /**
3102  * cik_select_se_sh - select which SE, SH to address
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: shader engine to address
3106  * @sh_num: sh block to address
3107  *
3108  * Select which SE, SH combinations to address. Certain
3109  * registers are instanced per SE or SH.  0xffffffff means
3110  * broadcast to all SEs or SHs (CIK).
3111  */
3112 static void cik_select_se_sh(struct radeon_device *rdev,
3113 			     u32 se_num, u32 sh_num)
3114 {
3115 	u32 data = INSTANCE_BROADCAST_WRITES;
3116 
3117 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3118 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3119 	else if (se_num == 0xffffffff)
3120 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3121 	else if (sh_num == 0xffffffff)
3122 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3123 	else
3124 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3125 	WREG32(GRBM_GFX_INDEX, data);
3126 }
3127 
3128 /**
3129  * cik_create_bitmask - create a bitmask
3130  *
3131  * @bit_width: length of the mask
3132  *
3133  * create a variable length bit mask (CIK).
3134  * Returns the bitmask.
3135  */
3136 static u32 cik_create_bitmask(u32 bit_width)
3137 {
3138 	u32 i, mask = 0;
3139 
3140 	for (i = 0; i < bit_width; i++) {
3141 		mask <<= 1;
3142 		mask |= 1;
3143 	}
3144 	return mask;
3145 }
3146 
3147 /**
3148  * cik_get_rb_disabled - computes the mask of disabled RBs
3149  *
3150  * @rdev: radeon_device pointer
3151  * @max_rb_num: max RBs (render backends) for the asic
3152  * @se_num: number of SEs (shader engines) for the asic
3153  * @sh_per_se: number of SH blocks per SE for the asic
3154  *
3155  * Calculates the bitmask of disabled RBs (CIK).
3156  * Returns the disabled RB bitmask.
3157  */
3158 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3159 			      u32 max_rb_num_per_se,
3160 			      u32 sh_per_se)
3161 {
3162 	u32 data, mask;
3163 
3164 	data = RREG32(CC_RB_BACKEND_DISABLE);
3165 	if (data & 1)
3166 		data &= BACKEND_DISABLE_MASK;
3167 	else
3168 		data = 0;
3169 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3170 
3171 	data >>= BACKEND_DISABLE_SHIFT;
3172 
3173 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3174 
3175 	return data & mask;
3176 }
3177 
3178 /**
3179  * cik_setup_rb - setup the RBs on the asic
3180  *
3181  * @rdev: radeon_device pointer
3182  * @se_num: number of SEs (shader engines) for the asic
3183  * @sh_per_se: number of SH blocks per SE for the asic
3184  * @max_rb_num: max RBs (render backends) for the asic
3185  *
3186  * Configures per-SE/SH RB registers (CIK).
3187  */
3188 static void cik_setup_rb(struct radeon_device *rdev,
3189 			 u32 se_num, u32 sh_per_se,
3190 			 u32 max_rb_num_per_se)
3191 {
3192 	int i, j;
3193 	u32 data, mask;
3194 	u32 disabled_rbs = 0;
3195 	u32 enabled_rbs = 0;
3196 
3197 	for (i = 0; i < se_num; i++) {
3198 		for (j = 0; j < sh_per_se; j++) {
3199 			cik_select_se_sh(rdev, i, j);
3200 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3201 			if (rdev->family == CHIP_HAWAII)
3202 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3203 			else
3204 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3205 		}
3206 	}
3207 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3208 
3209 	mask = 1;
3210 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3211 		if (!(disabled_rbs & mask))
3212 			enabled_rbs |= mask;
3213 		mask <<= 1;
3214 	}
3215 
3216 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3217 
3218 	for (i = 0; i < se_num; i++) {
3219 		cik_select_se_sh(rdev, i, 0xffffffff);
3220 		data = 0;
3221 		for (j = 0; j < sh_per_se; j++) {
3222 			switch (enabled_rbs & 3) {
3223 			case 0:
3224 				if (j == 0)
3225 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3226 				else
3227 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3228 				break;
3229 			case 1:
3230 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3231 				break;
3232 			case 2:
3233 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3234 				break;
3235 			case 3:
3236 			default:
3237 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3238 				break;
3239 			}
3240 			enabled_rbs >>= 2;
3241 		}
3242 		WREG32(PA_SC_RASTER_CONFIG, data);
3243 	}
3244 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3245 }
3246 
3247 /**
3248  * cik_gpu_init - setup the 3D engine
3249  *
3250  * @rdev: radeon_device pointer
3251  *
3252  * Configures the 3D engine and tiling configuration
3253  * registers so that the 3D engine is usable.
3254  */
3255 static void cik_gpu_init(struct radeon_device *rdev)
3256 {
3257 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3258 	u32 mc_shared_chmap, mc_arb_ramcfg;
3259 	u32 hdp_host_path_cntl;
3260 	u32 tmp;
3261 	int i, j, k;
3262 
3263 	switch (rdev->family) {
3264 	case CHIP_BONAIRE:
3265 		rdev->config.cik.max_shader_engines = 2;
3266 		rdev->config.cik.max_tile_pipes = 4;
3267 		rdev->config.cik.max_cu_per_sh = 7;
3268 		rdev->config.cik.max_sh_per_se = 1;
3269 		rdev->config.cik.max_backends_per_se = 2;
3270 		rdev->config.cik.max_texture_channel_caches = 4;
3271 		rdev->config.cik.max_gprs = 256;
3272 		rdev->config.cik.max_gs_threads = 32;
3273 		rdev->config.cik.max_hw_contexts = 8;
3274 
3275 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3276 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3277 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3278 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3279 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3280 		break;
3281 	case CHIP_HAWAII:
3282 		rdev->config.cik.max_shader_engines = 4;
3283 		rdev->config.cik.max_tile_pipes = 16;
3284 		rdev->config.cik.max_cu_per_sh = 11;
3285 		rdev->config.cik.max_sh_per_se = 1;
3286 		rdev->config.cik.max_backends_per_se = 4;
3287 		rdev->config.cik.max_texture_channel_caches = 16;
3288 		rdev->config.cik.max_gprs = 256;
3289 		rdev->config.cik.max_gs_threads = 32;
3290 		rdev->config.cik.max_hw_contexts = 8;
3291 
3292 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3293 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3294 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3295 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3296 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3297 		break;
3298 	case CHIP_KAVERI:
3299 		rdev->config.cik.max_shader_engines = 1;
3300 		rdev->config.cik.max_tile_pipes = 4;
3301 		if ((rdev->pdev->device == 0x1304) ||
3302 		    (rdev->pdev->device == 0x1305) ||
3303 		    (rdev->pdev->device == 0x130C) ||
3304 		    (rdev->pdev->device == 0x130F) ||
3305 		    (rdev->pdev->device == 0x1310) ||
3306 		    (rdev->pdev->device == 0x1311) ||
3307 		    (rdev->pdev->device == 0x131C)) {
3308 			rdev->config.cik.max_cu_per_sh = 8;
3309 			rdev->config.cik.max_backends_per_se = 2;
3310 		} else if ((rdev->pdev->device == 0x1309) ||
3311 			   (rdev->pdev->device == 0x130A) ||
3312 			   (rdev->pdev->device == 0x130D) ||
3313 			   (rdev->pdev->device == 0x1313) ||
3314 			   (rdev->pdev->device == 0x131D)) {
3315 			rdev->config.cik.max_cu_per_sh = 6;
3316 			rdev->config.cik.max_backends_per_se = 2;
3317 		} else if ((rdev->pdev->device == 0x1306) ||
3318 			   (rdev->pdev->device == 0x1307) ||
3319 			   (rdev->pdev->device == 0x130B) ||
3320 			   (rdev->pdev->device == 0x130E) ||
3321 			   (rdev->pdev->device == 0x1315) ||
3322 			   (rdev->pdev->device == 0x131B)) {
3323 			rdev->config.cik.max_cu_per_sh = 4;
3324 			rdev->config.cik.max_backends_per_se = 1;
3325 		} else {
3326 			rdev->config.cik.max_cu_per_sh = 3;
3327 			rdev->config.cik.max_backends_per_se = 1;
3328 		}
3329 		rdev->config.cik.max_sh_per_se = 1;
3330 		rdev->config.cik.max_texture_channel_caches = 4;
3331 		rdev->config.cik.max_gprs = 256;
3332 		rdev->config.cik.max_gs_threads = 16;
3333 		rdev->config.cik.max_hw_contexts = 8;
3334 
3335 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3336 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3337 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3338 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3339 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3340 		break;
3341 	case CHIP_KABINI:
3342 	case CHIP_MULLINS:
3343 	default:
3344 		rdev->config.cik.max_shader_engines = 1;
3345 		rdev->config.cik.max_tile_pipes = 2;
3346 		rdev->config.cik.max_cu_per_sh = 2;
3347 		rdev->config.cik.max_sh_per_se = 1;
3348 		rdev->config.cik.max_backends_per_se = 1;
3349 		rdev->config.cik.max_texture_channel_caches = 2;
3350 		rdev->config.cik.max_gprs = 256;
3351 		rdev->config.cik.max_gs_threads = 16;
3352 		rdev->config.cik.max_hw_contexts = 8;
3353 
3354 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3355 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3356 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3357 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3358 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3359 		break;
3360 	}
3361 
3362 	/* Initialize HDP */
3363 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3364 		WREG32((0x2c14 + j), 0x00000000);
3365 		WREG32((0x2c18 + j), 0x00000000);
3366 		WREG32((0x2c1c + j), 0x00000000);
3367 		WREG32((0x2c20 + j), 0x00000000);
3368 		WREG32((0x2c24 + j), 0x00000000);
3369 	}
3370 
3371 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3372 
3373 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3374 
3375 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3376 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3377 
3378 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3379 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3380 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3381 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3382 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3383 		rdev->config.cik.mem_row_size_in_kb = 4;
3384 	/* XXX use MC settings? */
3385 	rdev->config.cik.shader_engine_tile_size = 32;
3386 	rdev->config.cik.num_gpus = 1;
3387 	rdev->config.cik.multi_gpu_tile_size = 64;
3388 
3389 	/* fix up row size */
3390 	gb_addr_config &= ~ROW_SIZE_MASK;
3391 	switch (rdev->config.cik.mem_row_size_in_kb) {
3392 	case 1:
3393 	default:
3394 		gb_addr_config |= ROW_SIZE(0);
3395 		break;
3396 	case 2:
3397 		gb_addr_config |= ROW_SIZE(1);
3398 		break;
3399 	case 4:
3400 		gb_addr_config |= ROW_SIZE(2);
3401 		break;
3402 	}
3403 
3404 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3405 	 * not have bank info, so create a custom tiling dword.
3406 	 * bits 3:0   num_pipes
3407 	 * bits 7:4   num_banks
3408 	 * bits 11:8  group_size
3409 	 * bits 15:12 row_size
3410 	 */
3411 	rdev->config.cik.tile_config = 0;
3412 	switch (rdev->config.cik.num_tile_pipes) {
3413 	case 1:
3414 		rdev->config.cik.tile_config |= (0 << 0);
3415 		break;
3416 	case 2:
3417 		rdev->config.cik.tile_config |= (1 << 0);
3418 		break;
3419 	case 4:
3420 		rdev->config.cik.tile_config |= (2 << 0);
3421 		break;
3422 	case 8:
3423 	default:
3424 		/* XXX what about 12? */
3425 		rdev->config.cik.tile_config |= (3 << 0);
3426 		break;
3427 	}
3428 	rdev->config.cik.tile_config |=
3429 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3430 	rdev->config.cik.tile_config |=
3431 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3432 	rdev->config.cik.tile_config |=
3433 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3434 
3435 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3436 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3437 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3438 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3439 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3440 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3441 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3442 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3443 
3444 	cik_tiling_mode_table_init(rdev);
3445 
3446 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3447 		     rdev->config.cik.max_sh_per_se,
3448 		     rdev->config.cik.max_backends_per_se);
3449 
3450 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3451 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3452 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
3453 				rdev->config.cik.active_cus +=
3454 					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3455 			}
3456 		}
3457 	}
3458 
3459 	/* set HW defaults for 3D engine */
3460 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3461 
3462 	WREG32(SX_DEBUG_1, 0x20);
3463 
3464 	WREG32(TA_CNTL_AUX, 0x00010000);
3465 
3466 	tmp = RREG32(SPI_CONFIG_CNTL);
3467 	tmp |= 0x03000000;
3468 	WREG32(SPI_CONFIG_CNTL, tmp);
3469 
3470 	WREG32(SQ_CONFIG, 1);
3471 
3472 	WREG32(DB_DEBUG, 0);
3473 
3474 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3475 	tmp |= 0x00000400;
3476 	WREG32(DB_DEBUG2, tmp);
3477 
3478 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3479 	tmp |= 0x00020200;
3480 	WREG32(DB_DEBUG3, tmp);
3481 
3482 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3483 	tmp |= 0x00018208;
3484 	WREG32(CB_HW_CONTROL, tmp);
3485 
3486 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3487 
3488 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3489 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3490 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3491 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3492 
3493 	WREG32(VGT_NUM_INSTANCES, 1);
3494 
3495 	WREG32(CP_PERFMON_CNTL, 0);
3496 
3497 	WREG32(SQ_CONFIG, 0);
3498 
3499 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3500 					  FORCE_EOV_MAX_REZ_CNT(255)));
3501 
3502 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3503 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3504 
3505 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3506 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3507 
3508 	tmp = RREG32(HDP_MISC_CNTL);
3509 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3510 	WREG32(HDP_MISC_CNTL, tmp);
3511 
3512 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3513 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3514 
3515 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3516 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3517 
3518 	udelay(50);
3519 }
3520 
3521 /*
3522  * GPU scratch registers helpers function.
3523  */
3524 /**
3525  * cik_scratch_init - setup driver info for CP scratch regs
3526  *
3527  * @rdev: radeon_device pointer
3528  *
3529  * Set up the number and offset of the CP scratch registers.
3530  * NOTE: use of CP scratch registers is a legacy inferface and
3531  * is not used by default on newer asics (r6xx+).  On newer asics,
3532  * memory buffers are used for fences rather than scratch regs.
3533  */
3534 static void cik_scratch_init(struct radeon_device *rdev)
3535 {
3536 	int i;
3537 
3538 	rdev->scratch.num_reg = 7;
3539 	rdev->scratch.reg_base = SCRATCH_REG0;
3540 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3541 		rdev->scratch.free[i] = true;
3542 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3543 	}
3544 }
3545 
3546 /**
3547  * cik_ring_test - basic gfx ring test
3548  *
3549  * @rdev: radeon_device pointer
3550  * @ring: radeon_ring structure holding ring information
3551  *
3552  * Allocate a scratch register and write to it using the gfx ring (CIK).
3553  * Provides a basic gfx ring test to verify that the ring is working.
3554  * Used by cik_cp_gfx_resume();
3555  * Returns 0 on success, error on failure.
3556  */
3557 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3558 {
3559 	uint32_t scratch;
3560 	uint32_t tmp = 0;
3561 	unsigned i;
3562 	int r;
3563 
3564 	r = radeon_scratch_get(rdev, &scratch);
3565 	if (r) {
3566 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3567 		return r;
3568 	}
3569 	WREG32(scratch, 0xCAFEDEAD);
3570 	r = radeon_ring_lock(rdev, ring, 3);
3571 	if (r) {
3572 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3573 		radeon_scratch_free(rdev, scratch);
3574 		return r;
3575 	}
3576 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3577 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3578 	radeon_ring_write(ring, 0xDEADBEEF);
3579 	radeon_ring_unlock_commit(rdev, ring);
3580 
3581 	for (i = 0; i < rdev->usec_timeout; i++) {
3582 		tmp = RREG32(scratch);
3583 		if (tmp == 0xDEADBEEF)
3584 			break;
3585 		DRM_UDELAY(1);
3586 	}
3587 	if (i < rdev->usec_timeout) {
3588 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3589 	} else {
3590 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3591 			  ring->idx, scratch, tmp);
3592 		r = -EINVAL;
3593 	}
3594 	radeon_scratch_free(rdev, scratch);
3595 	return r;
3596 }
3597 
3598 /**
3599  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3600  *
3601  * @rdev: radeon_device pointer
3602  * @ridx: radeon ring index
3603  *
3604  * Emits an hdp flush on the cp.
3605  */
3606 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3607 				       int ridx)
3608 {
3609 	struct radeon_ring *ring = &rdev->ring[ridx];
3610 	u32 ref_and_mask;
3611 
3612 	switch (ring->idx) {
3613 	case CAYMAN_RING_TYPE_CP1_INDEX:
3614 	case CAYMAN_RING_TYPE_CP2_INDEX:
3615 	default:
3616 		switch (ring->me) {
3617 		case 0:
3618 			ref_and_mask = CP2 << ring->pipe;
3619 			break;
3620 		case 1:
3621 			ref_and_mask = CP6 << ring->pipe;
3622 			break;
3623 		default:
3624 			return;
3625 		}
3626 		break;
3627 	case RADEON_RING_TYPE_GFX_INDEX:
3628 		ref_and_mask = CP0;
3629 		break;
3630 	}
3631 
3632 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3633 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3634 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3635 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3636 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3637 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3638 	radeon_ring_write(ring, ref_and_mask);
3639 	radeon_ring_write(ring, ref_and_mask);
3640 	radeon_ring_write(ring, 0x20); /* poll interval */
3641 }
3642 
3643 /**
3644  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3645  *
3646  * @rdev: radeon_device pointer
3647  * @fence: radeon fence object
3648  *
3649  * Emits a fence sequnce number on the gfx ring and flushes
3650  * GPU caches.
3651  */
3652 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3653 			     struct radeon_fence *fence)
3654 {
3655 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3656 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3657 
3658 	/* EVENT_WRITE_EOP - flush caches, send int */
3659 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3660 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3661 				 EOP_TC_ACTION_EN |
3662 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3663 				 EVENT_INDEX(5)));
3664 	radeon_ring_write(ring, addr & 0xfffffffc);
3665 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3666 	radeon_ring_write(ring, fence->seq);
3667 	radeon_ring_write(ring, 0);
3668 	/* HDP flush */
3669 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3670 }
3671 
3672 /**
3673  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3674  *
3675  * @rdev: radeon_device pointer
3676  * @fence: radeon fence object
3677  *
3678  * Emits a fence sequnce number on the compute ring and flushes
3679  * GPU caches.
3680  */
3681 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3682 				 struct radeon_fence *fence)
3683 {
3684 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3685 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3686 
3687 	/* RELEASE_MEM - flush caches, send int */
3688 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3689 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3690 				 EOP_TC_ACTION_EN |
3691 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3692 				 EVENT_INDEX(5)));
3693 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3694 	radeon_ring_write(ring, addr & 0xfffffffc);
3695 	radeon_ring_write(ring, upper_32_bits(addr));
3696 	radeon_ring_write(ring, fence->seq);
3697 	radeon_ring_write(ring, 0);
3698 	/* HDP flush */
3699 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3700 }
3701 
3702 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3703 			     struct radeon_ring *ring,
3704 			     struct radeon_semaphore *semaphore,
3705 			     bool emit_wait)
3706 {
3707 	uint64_t addr = semaphore->gpu_addr;
3708 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3709 
3710 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3711 	radeon_ring_write(ring, lower_32_bits(addr));
3712 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3713 
3714 	return true;
3715 }
3716 
3717 /**
3718  * cik_copy_cpdma - copy pages using the CP DMA engine
3719  *
3720  * @rdev: radeon_device pointer
3721  * @src_offset: src GPU address
3722  * @dst_offset: dst GPU address
3723  * @num_gpu_pages: number of GPU pages to xfer
3724  * @fence: radeon fence object
3725  *
3726  * Copy GPU paging using the CP DMA engine (CIK+).
3727  * Used by the radeon ttm implementation to move pages if
3728  * registered as the asic copy callback.
3729  */
3730 int cik_copy_cpdma(struct radeon_device *rdev,
3731 		   uint64_t src_offset, uint64_t dst_offset,
3732 		   unsigned num_gpu_pages,
3733 		   struct radeon_fence **fence)
3734 {
3735 	struct radeon_semaphore *sem = NULL;
3736 	int ring_index = rdev->asic->copy.blit_ring_index;
3737 	struct radeon_ring *ring = &rdev->ring[ring_index];
3738 	u32 size_in_bytes, cur_size_in_bytes, control;
3739 	int i, num_loops;
3740 	int r = 0;
3741 
3742 	r = radeon_semaphore_create(rdev, &sem);
3743 	if (r) {
3744 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3745 		return r;
3746 	}
3747 
3748 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3749 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3750 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3751 	if (r) {
3752 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3753 		radeon_semaphore_free(rdev, &sem, NULL);
3754 		return r;
3755 	}
3756 
3757 	radeon_semaphore_sync_to(sem, *fence);
3758 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3759 
3760 	for (i = 0; i < num_loops; i++) {
3761 		cur_size_in_bytes = size_in_bytes;
3762 		if (cur_size_in_bytes > 0x1fffff)
3763 			cur_size_in_bytes = 0x1fffff;
3764 		size_in_bytes -= cur_size_in_bytes;
3765 		control = 0;
3766 		if (size_in_bytes == 0)
3767 			control |= PACKET3_DMA_DATA_CP_SYNC;
3768 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3769 		radeon_ring_write(ring, control);
3770 		radeon_ring_write(ring, lower_32_bits(src_offset));
3771 		radeon_ring_write(ring, upper_32_bits(src_offset));
3772 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3773 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3774 		radeon_ring_write(ring, cur_size_in_bytes);
3775 		src_offset += cur_size_in_bytes;
3776 		dst_offset += cur_size_in_bytes;
3777 	}
3778 
3779 	r = radeon_fence_emit(rdev, fence, ring->idx);
3780 	if (r) {
3781 		radeon_ring_unlock_undo(rdev, ring);
3782 		radeon_semaphore_free(rdev, &sem, NULL);
3783 		return r;
3784 	}
3785 
3786 	radeon_ring_unlock_commit(rdev, ring);
3787 	radeon_semaphore_free(rdev, &sem, *fence);
3788 
3789 	return r;
3790 }
3791 
3792 /*
3793  * IB stuff
3794  */
3795 /**
3796  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ib: radeon indirect buffer object
3800  *
3801  * Emits an DE (drawing engine) or CE (constant engine) IB
3802  * on the gfx ring.  IBs are usually generated by userspace
3803  * acceleration drivers and submitted to the kernel for
3804  * sheduling on the ring.  This function schedules the IB
3805  * on the gfx ring for execution by the GPU.
3806  */
3807 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3808 {
3809 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3810 	u32 header, control = INDIRECT_BUFFER_VALID;
3811 
3812 	if (ib->is_const_ib) {
3813 		/* set switch buffer packet before const IB */
3814 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3815 		radeon_ring_write(ring, 0);
3816 
3817 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3818 	} else {
3819 		u32 next_rptr;
3820 		if (ring->rptr_save_reg) {
3821 			next_rptr = ring->wptr + 3 + 4;
3822 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3823 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3824 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3825 			radeon_ring_write(ring, next_rptr);
3826 		} else if (rdev->wb.enabled) {
3827 			next_rptr = ring->wptr + 5 + 4;
3828 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3829 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3830 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3831 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3832 			radeon_ring_write(ring, next_rptr);
3833 		}
3834 
3835 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3836 	}
3837 
3838 	control |= ib->length_dw |
3839 		(ib->vm ? (ib->vm->id << 24) : 0);
3840 
3841 	radeon_ring_write(ring, header);
3842 	radeon_ring_write(ring,
3843 #ifdef __BIG_ENDIAN
3844 			  (2 << 0) |
3845 #endif
3846 			  (ib->gpu_addr & 0xFFFFFFFC));
3847 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3848 	radeon_ring_write(ring, control);
3849 }
3850 
3851 /**
3852  * cik_ib_test - basic gfx ring IB test
3853  *
3854  * @rdev: radeon_device pointer
3855  * @ring: radeon_ring structure holding ring information
3856  *
3857  * Allocate an IB and execute it on the gfx ring (CIK).
3858  * Provides a basic gfx ring test to verify that IBs are working.
3859  * Returns 0 on success, error on failure.
3860  */
3861 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3862 {
3863 	struct radeon_ib ib;
3864 	uint32_t scratch;
3865 	uint32_t tmp = 0;
3866 	unsigned i;
3867 	int r;
3868 
3869 	r = radeon_scratch_get(rdev, &scratch);
3870 	if (r) {
3871 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3872 		return r;
3873 	}
3874 	WREG32(scratch, 0xCAFEDEAD);
3875 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3876 	if (r) {
3877 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3878 		radeon_scratch_free(rdev, scratch);
3879 		return r;
3880 	}
3881 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3882 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3883 	ib.ptr[2] = 0xDEADBEEF;
3884 	ib.length_dw = 3;
3885 	r = radeon_ib_schedule(rdev, &ib, NULL);
3886 	if (r) {
3887 		radeon_scratch_free(rdev, scratch);
3888 		radeon_ib_free(rdev, &ib);
3889 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3890 		return r;
3891 	}
3892 	r = radeon_fence_wait(ib.fence, false);
3893 	if (r) {
3894 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3895 		radeon_scratch_free(rdev, scratch);
3896 		radeon_ib_free(rdev, &ib);
3897 		return r;
3898 	}
3899 	for (i = 0; i < rdev->usec_timeout; i++) {
3900 		tmp = RREG32(scratch);
3901 		if (tmp == 0xDEADBEEF)
3902 			break;
3903 		DRM_UDELAY(1);
3904 	}
3905 	if (i < rdev->usec_timeout) {
3906 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3907 	} else {
3908 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3909 			  scratch, tmp);
3910 		r = -EINVAL;
3911 	}
3912 	radeon_scratch_free(rdev, scratch);
3913 	radeon_ib_free(rdev, &ib);
3914 	return r;
3915 }
3916 
3917 /*
3918  * CP.
3919  * On CIK, gfx and compute now have independant command processors.
3920  *
3921  * GFX
3922  * Gfx consists of a single ring and can process both gfx jobs and
3923  * compute jobs.  The gfx CP consists of three microengines (ME):
3924  * PFP - Pre-Fetch Parser
3925  * ME - Micro Engine
3926  * CE - Constant Engine
3927  * The PFP and ME make up what is considered the Drawing Engine (DE).
3928  * The CE is an asynchronous engine used for updating buffer desciptors
3929  * used by the DE so that they can be loaded into cache in parallel
3930  * while the DE is processing state update packets.
3931  *
3932  * Compute
3933  * The compute CP consists of two microengines (ME):
3934  * MEC1 - Compute MicroEngine 1
3935  * MEC2 - Compute MicroEngine 2
3936  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3937  * The queues are exposed to userspace and are programmed directly
3938  * by the compute runtime.
3939  */
3940 /**
3941  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3942  *
3943  * @rdev: radeon_device pointer
3944  * @enable: enable or disable the MEs
3945  *
3946  * Halts or unhalts the gfx MEs.
3947  */
3948 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3949 {
3950 	if (enable)
3951 		WREG32(CP_ME_CNTL, 0);
3952 	else {
3953 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3954 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3955 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3956 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3957 	}
3958 	udelay(50);
3959 }
3960 
3961 /**
3962  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3963  *
3964  * @rdev: radeon_device pointer
3965  *
3966  * Loads the gfx PFP, ME, and CE ucode.
3967  * Returns 0 for success, -EINVAL if the ucode is not available.
3968  */
3969 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3970 {
3971 	const __be32 *fw_data;
3972 	int i;
3973 
3974 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3975 		return -EINVAL;
3976 
3977 	cik_cp_gfx_enable(rdev, false);
3978 
3979 	/* PFP */
3980 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3981 	WREG32(CP_PFP_UCODE_ADDR, 0);
3982 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3983 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3984 	WREG32(CP_PFP_UCODE_ADDR, 0);
3985 
3986 	/* CE */
3987 	fw_data = (const __be32 *)rdev->ce_fw->data;
3988 	WREG32(CP_CE_UCODE_ADDR, 0);
3989 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3990 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3991 	WREG32(CP_CE_UCODE_ADDR, 0);
3992 
3993 	/* ME */
3994 	fw_data = (const __be32 *)rdev->me_fw->data;
3995 	WREG32(CP_ME_RAM_WADDR, 0);
3996 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3997 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3998 	WREG32(CP_ME_RAM_WADDR, 0);
3999 
4000 	WREG32(CP_PFP_UCODE_ADDR, 0);
4001 	WREG32(CP_CE_UCODE_ADDR, 0);
4002 	WREG32(CP_ME_RAM_WADDR, 0);
4003 	WREG32(CP_ME_RAM_RADDR, 0);
4004 	return 0;
4005 }
4006 
4007 /**
4008  * cik_cp_gfx_start - start the gfx ring
4009  *
4010  * @rdev: radeon_device pointer
4011  *
4012  * Enables the ring and loads the clear state context and other
4013  * packets required to init the ring.
4014  * Returns 0 for success, error for failure.
4015  */
4016 static int cik_cp_gfx_start(struct radeon_device *rdev)
4017 {
4018 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4019 	int r, i;
4020 
4021 	/* init the CP */
4022 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4023 	WREG32(CP_ENDIAN_SWAP, 0);
4024 	WREG32(CP_DEVICE_ID, 1);
4025 
4026 	cik_cp_gfx_enable(rdev, true);
4027 
4028 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4029 	if (r) {
4030 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4031 		return r;
4032 	}
4033 
4034 	/* init the CE partitions.  CE only used for gfx on CIK */
4035 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4036 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4037 	radeon_ring_write(ring, 0xc000);
4038 	radeon_ring_write(ring, 0xc000);
4039 
4040 	/* setup clear context state */
4041 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4043 
4044 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4045 	radeon_ring_write(ring, 0x80000000);
4046 	radeon_ring_write(ring, 0x80000000);
4047 
4048 	for (i = 0; i < cik_default_size; i++)
4049 		radeon_ring_write(ring, cik_default_state[i]);
4050 
4051 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4052 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4053 
4054 	/* set clear context state */
4055 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4056 	radeon_ring_write(ring, 0);
4057 
4058 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4059 	radeon_ring_write(ring, 0x00000316);
4060 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4061 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4062 
4063 	radeon_ring_unlock_commit(rdev, ring);
4064 
4065 	return 0;
4066 }
4067 
4068 /**
4069  * cik_cp_gfx_fini - stop the gfx ring
4070  *
4071  * @rdev: radeon_device pointer
4072  *
4073  * Stop the gfx ring and tear down the driver ring
4074  * info.
4075  */
4076 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4077 {
4078 	cik_cp_gfx_enable(rdev, false);
4079 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4080 }
4081 
4082 /**
4083  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Program the location and size of the gfx ring buffer
4088  * and test it to make sure it's working.
4089  * Returns 0 for success, error for failure.
4090  */
4091 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4092 {
4093 	struct radeon_ring *ring;
4094 	u32 tmp;
4095 	u32 rb_bufsz;
4096 	u64 rb_addr;
4097 	int r;
4098 
4099 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4100 	if (rdev->family != CHIP_HAWAII)
4101 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4102 
4103 	/* Set the write pointer delay */
4104 	WREG32(CP_RB_WPTR_DELAY, 0);
4105 
4106 	/* set the RB to use vmid 0 */
4107 	WREG32(CP_RB_VMID, 0);
4108 
4109 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4110 
4111 	/* ring 0 - compute and gfx */
4112 	/* Set ring buffer size */
4113 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4114 	rb_bufsz = order_base_2(ring->ring_size / 8);
4115 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4116 #ifdef __BIG_ENDIAN
4117 	tmp |= BUF_SWAP_32BIT;
4118 #endif
4119 	WREG32(CP_RB0_CNTL, tmp);
4120 
4121 	/* Initialize the ring buffer's read and write pointers */
4122 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4123 	ring->wptr = 0;
4124 	WREG32(CP_RB0_WPTR, ring->wptr);
4125 
4126 	/* set the wb address wether it's enabled or not */
4127 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4128 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4129 
4130 	/* scratch register shadowing is no longer supported */
4131 	WREG32(SCRATCH_UMSK, 0);
4132 
4133 	if (!rdev->wb.enabled)
4134 		tmp |= RB_NO_UPDATE;
4135 
4136 	mdelay(1);
4137 	WREG32(CP_RB0_CNTL, tmp);
4138 
4139 	rb_addr = ring->gpu_addr >> 8;
4140 	WREG32(CP_RB0_BASE, rb_addr);
4141 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4142 
4143 	/* start the ring */
4144 	cik_cp_gfx_start(rdev);
4145 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4146 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4147 	if (r) {
4148 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4149 		return r;
4150 	}
4151 
4152 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4153 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4154 
4155 	return 0;
4156 }
4157 
4158 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4159 		     struct radeon_ring *ring)
4160 {
4161 	u32 rptr;
4162 
4163 	if (rdev->wb.enabled)
4164 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4165 	else
4166 		rptr = RREG32(CP_RB0_RPTR);
4167 
4168 	return rptr;
4169 }
4170 
4171 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4172 		     struct radeon_ring *ring)
4173 {
4174 	u32 wptr;
4175 
4176 	wptr = RREG32(CP_RB0_WPTR);
4177 
4178 	return wptr;
4179 }
4180 
4181 void cik_gfx_set_wptr(struct radeon_device *rdev,
4182 		      struct radeon_ring *ring)
4183 {
4184 	WREG32(CP_RB0_WPTR, ring->wptr);
4185 	(void)RREG32(CP_RB0_WPTR);
4186 }
4187 
4188 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4189 			 struct radeon_ring *ring)
4190 {
4191 	u32 rptr;
4192 
4193 	if (rdev->wb.enabled) {
4194 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4195 	} else {
4196 		mutex_lock(&rdev->srbm_mutex);
4197 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4198 		rptr = RREG32(CP_HQD_PQ_RPTR);
4199 		cik_srbm_select(rdev, 0, 0, 0, 0);
4200 		mutex_unlock(&rdev->srbm_mutex);
4201 	}
4202 
4203 	return rptr;
4204 }
4205 
4206 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 wptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		/* XXX check if swapping is necessary on BE */
4213 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4214 	} else {
4215 		mutex_lock(&rdev->srbm_mutex);
4216 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 		wptr = RREG32(CP_HQD_PQ_WPTR);
4218 		cik_srbm_select(rdev, 0, 0, 0, 0);
4219 		mutex_unlock(&rdev->srbm_mutex);
4220 	}
4221 
4222 	return wptr;
4223 }
4224 
4225 void cik_compute_set_wptr(struct radeon_device *rdev,
4226 			  struct radeon_ring *ring)
4227 {
4228 	/* XXX check if swapping is necessary on BE */
4229 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4230 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4231 }
4232 
4233 /**
4234  * cik_cp_compute_enable - enable/disable the compute CP MEs
4235  *
4236  * @rdev: radeon_device pointer
4237  * @enable: enable or disable the MEs
4238  *
4239  * Halts or unhalts the compute MEs.
4240  */
4241 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4242 {
4243 	if (enable)
4244 		WREG32(CP_MEC_CNTL, 0);
4245 	else {
4246 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4247 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4248 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4249 	}
4250 	udelay(50);
4251 }
4252 
4253 /**
4254  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4255  *
4256  * @rdev: radeon_device pointer
4257  *
4258  * Loads the compute MEC1&2 ucode.
4259  * Returns 0 for success, -EINVAL if the ucode is not available.
4260  */
4261 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4262 {
4263 	const __be32 *fw_data;
4264 	int i;
4265 
4266 	if (!rdev->mec_fw)
4267 		return -EINVAL;
4268 
4269 	cik_cp_compute_enable(rdev, false);
4270 
4271 	/* MEC1 */
4272 	fw_data = (const __be32 *)rdev->mec_fw->data;
4273 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4274 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4275 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4276 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4277 
4278 	if (rdev->family == CHIP_KAVERI) {
4279 		/* MEC2 */
4280 		fw_data = (const __be32 *)rdev->mec_fw->data;
4281 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4283 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4284 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4285 	}
4286 
4287 	return 0;
4288 }
4289 
4290 /**
4291  * cik_cp_compute_start - start the compute queues
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Enable the compute queues.
4296  * Returns 0 for success, error for failure.
4297  */
4298 static int cik_cp_compute_start(struct radeon_device *rdev)
4299 {
4300 	cik_cp_compute_enable(rdev, true);
4301 
4302 	return 0;
4303 }
4304 
4305 /**
4306  * cik_cp_compute_fini - stop the compute queues
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Stop the compute queues and tear down the driver queue
4311  * info.
4312  */
4313 static void cik_cp_compute_fini(struct radeon_device *rdev)
4314 {
4315 	int i, idx, r;
4316 
4317 	cik_cp_compute_enable(rdev, false);
4318 
4319 	for (i = 0; i < 2; i++) {
4320 		if (i == 0)
4321 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4322 		else
4323 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4324 
4325 		if (rdev->ring[idx].mqd_obj) {
4326 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4327 			if (unlikely(r != 0))
4328 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4329 
4330 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4331 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4332 
4333 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4334 			rdev->ring[idx].mqd_obj = NULL;
4335 		}
4336 	}
4337 }
4338 
4339 static void cik_mec_fini(struct radeon_device *rdev)
4340 {
4341 	int r;
4342 
4343 	if (rdev->mec.hpd_eop_obj) {
4344 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4345 		if (unlikely(r != 0))
4346 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4347 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4348 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4349 
4350 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4351 		rdev->mec.hpd_eop_obj = NULL;
4352 	}
4353 }
4354 
4355 #define MEC_HPD_SIZE 2048
4356 
4357 static int cik_mec_init(struct radeon_device *rdev)
4358 {
4359 	int r;
4360 	u32 *hpd;
4361 
4362 	/*
4363 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4364 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4365 	 */
4366 	if (rdev->family == CHIP_KAVERI)
4367 		rdev->mec.num_mec = 2;
4368 	else
4369 		rdev->mec.num_mec = 1;
4370 	rdev->mec.num_pipe = 4;
4371 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4372 
4373 	if (rdev->mec.hpd_eop_obj == NULL) {
4374 		r = radeon_bo_create(rdev,
4375 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4376 				     PAGE_SIZE, true,
4377 				     RADEON_GEM_DOMAIN_GTT, NULL,
4378 				     &rdev->mec.hpd_eop_obj);
4379 		if (r) {
4380 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4381 			return r;
4382 		}
4383 	}
4384 
4385 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4386 	if (unlikely(r != 0)) {
4387 		cik_mec_fini(rdev);
4388 		return r;
4389 	}
4390 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4391 			  &rdev->mec.hpd_eop_gpu_addr);
4392 	if (r) {
4393 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4394 		cik_mec_fini(rdev);
4395 		return r;
4396 	}
4397 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4398 	if (r) {
4399 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4400 		cik_mec_fini(rdev);
4401 		return r;
4402 	}
4403 
4404 	/* clear memory.  Not sure if this is required or not */
4405 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4406 
4407 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4408 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4409 
4410 	return 0;
4411 }
4412 
4413 struct hqd_registers
4414 {
4415 	u32 cp_mqd_base_addr;
4416 	u32 cp_mqd_base_addr_hi;
4417 	u32 cp_hqd_active;
4418 	u32 cp_hqd_vmid;
4419 	u32 cp_hqd_persistent_state;
4420 	u32 cp_hqd_pipe_priority;
4421 	u32 cp_hqd_queue_priority;
4422 	u32 cp_hqd_quantum;
4423 	u32 cp_hqd_pq_base;
4424 	u32 cp_hqd_pq_base_hi;
4425 	u32 cp_hqd_pq_rptr;
4426 	u32 cp_hqd_pq_rptr_report_addr;
4427 	u32 cp_hqd_pq_rptr_report_addr_hi;
4428 	u32 cp_hqd_pq_wptr_poll_addr;
4429 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4430 	u32 cp_hqd_pq_doorbell_control;
4431 	u32 cp_hqd_pq_wptr;
4432 	u32 cp_hqd_pq_control;
4433 	u32 cp_hqd_ib_base_addr;
4434 	u32 cp_hqd_ib_base_addr_hi;
4435 	u32 cp_hqd_ib_rptr;
4436 	u32 cp_hqd_ib_control;
4437 	u32 cp_hqd_iq_timer;
4438 	u32 cp_hqd_iq_rptr;
4439 	u32 cp_hqd_dequeue_request;
4440 	u32 cp_hqd_dma_offload;
4441 	u32 cp_hqd_sema_cmd;
4442 	u32 cp_hqd_msg_type;
4443 	u32 cp_hqd_atomic0_preop_lo;
4444 	u32 cp_hqd_atomic0_preop_hi;
4445 	u32 cp_hqd_atomic1_preop_lo;
4446 	u32 cp_hqd_atomic1_preop_hi;
4447 	u32 cp_hqd_hq_scheduler0;
4448 	u32 cp_hqd_hq_scheduler1;
4449 	u32 cp_mqd_control;
4450 };
4451 
4452 struct bonaire_mqd
4453 {
4454 	u32 header;
4455 	u32 dispatch_initiator;
4456 	u32 dimensions[3];
4457 	u32 start_idx[3];
4458 	u32 num_threads[3];
4459 	u32 pipeline_stat_enable;
4460 	u32 perf_counter_enable;
4461 	u32 pgm[2];
4462 	u32 tba[2];
4463 	u32 tma[2];
4464 	u32 pgm_rsrc[2];
4465 	u32 vmid;
4466 	u32 resource_limits;
4467 	u32 static_thread_mgmt01[2];
4468 	u32 tmp_ring_size;
4469 	u32 static_thread_mgmt23[2];
4470 	u32 restart[3];
4471 	u32 thread_trace_enable;
4472 	u32 reserved1;
4473 	u32 user_data[16];
4474 	u32 vgtcs_invoke_count[2];
4475 	struct hqd_registers queue_state;
4476 	u32 dequeue_cntr;
4477 	u32 interrupt_queue[64];
4478 };
4479 
4480 /**
4481  * cik_cp_compute_resume - setup the compute queue registers
4482  *
4483  * @rdev: radeon_device pointer
4484  *
4485  * Program the compute queues and test them to make sure they
4486  * are working.
4487  * Returns 0 for success, error for failure.
4488  */
4489 static int cik_cp_compute_resume(struct radeon_device *rdev)
4490 {
4491 	int r, i, idx;
4492 	u32 tmp;
4493 	bool use_doorbell = true;
4494 	u64 hqd_gpu_addr;
4495 	u64 mqd_gpu_addr;
4496 	u64 eop_gpu_addr;
4497 	u64 wb_gpu_addr;
4498 	u32 *buf;
4499 	struct bonaire_mqd *mqd;
4500 
4501 	r = cik_cp_compute_start(rdev);
4502 	if (r)
4503 		return r;
4504 
4505 	/* fix up chicken bits */
4506 	tmp = RREG32(CP_CPF_DEBUG);
4507 	tmp |= (1 << 23);
4508 	WREG32(CP_CPF_DEBUG, tmp);
4509 
4510 	/* init the pipes */
4511 	mutex_lock(&rdev->srbm_mutex);
4512 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4513 		int me = (i < 4) ? 1 : 2;
4514 		int pipe = (i < 4) ? i : (i - 4);
4515 
4516 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4517 
4518 		cik_srbm_select(rdev, me, pipe, 0, 0);
4519 
4520 		/* write the EOP addr */
4521 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4522 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4523 
4524 		/* set the VMID assigned */
4525 		WREG32(CP_HPD_EOP_VMID, 0);
4526 
4527 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4528 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4529 		tmp &= ~EOP_SIZE_MASK;
4530 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4531 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4532 	}
4533 	cik_srbm_select(rdev, 0, 0, 0, 0);
4534 	mutex_unlock(&rdev->srbm_mutex);
4535 
4536 	/* init the queues.  Just two for now. */
4537 	for (i = 0; i < 2; i++) {
4538 		if (i == 0)
4539 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4540 		else
4541 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4542 
4543 		if (rdev->ring[idx].mqd_obj == NULL) {
4544 			r = radeon_bo_create(rdev,
4545 					     sizeof(struct bonaire_mqd),
4546 					     PAGE_SIZE, true,
4547 					     RADEON_GEM_DOMAIN_GTT, NULL,
4548 					     &rdev->ring[idx].mqd_obj);
4549 			if (r) {
4550 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4551 				return r;
4552 			}
4553 		}
4554 
4555 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4556 		if (unlikely(r != 0)) {
4557 			cik_cp_compute_fini(rdev);
4558 			return r;
4559 		}
4560 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4561 				  &mqd_gpu_addr);
4562 		if (r) {
4563 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4564 			cik_cp_compute_fini(rdev);
4565 			return r;
4566 		}
4567 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4568 		if (r) {
4569 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4570 			cik_cp_compute_fini(rdev);
4571 			return r;
4572 		}
4573 
4574 		/* init the mqd struct */
4575 		memset(buf, 0, sizeof(struct bonaire_mqd));
4576 
4577 		mqd = (struct bonaire_mqd *)buf;
4578 		mqd->header = 0xC0310800;
4579 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4580 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4581 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4582 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4583 
4584 		mutex_lock(&rdev->srbm_mutex);
4585 		cik_srbm_select(rdev, rdev->ring[idx].me,
4586 				rdev->ring[idx].pipe,
4587 				rdev->ring[idx].queue, 0);
4588 
4589 		/* disable wptr polling */
4590 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4591 		tmp &= ~WPTR_POLL_EN;
4592 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4593 
4594 		/* enable doorbell? */
4595 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4596 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4597 		if (use_doorbell)
4598 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4599 		else
4600 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4601 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4602 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4603 
4604 		/* disable the queue if it's active */
4605 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4606 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4607 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4608 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4609 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4610 			for (i = 0; i < rdev->usec_timeout; i++) {
4611 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4612 					break;
4613 				udelay(1);
4614 			}
4615 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4616 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4617 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4618 		}
4619 
4620 		/* set the pointer to the MQD */
4621 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4622 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4623 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4624 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4625 		/* set MQD vmid to 0 */
4626 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4627 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4628 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4629 
4630 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4631 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4632 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4633 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4634 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4635 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4636 
4637 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4638 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4639 		mqd->queue_state.cp_hqd_pq_control &=
4640 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4641 
4642 		mqd->queue_state.cp_hqd_pq_control |=
4643 			order_base_2(rdev->ring[idx].ring_size / 8);
4644 		mqd->queue_state.cp_hqd_pq_control |=
4645 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4646 #ifdef __BIG_ENDIAN
4647 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4648 #endif
4649 		mqd->queue_state.cp_hqd_pq_control &=
4650 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4651 		mqd->queue_state.cp_hqd_pq_control |=
4652 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4653 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4654 
4655 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4656 		if (i == 0)
4657 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4658 		else
4659 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4660 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4661 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4662 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4663 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4664 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4665 
4666 		/* set the wb address wether it's enabled or not */
4667 		if (i == 0)
4668 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4669 		else
4670 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4671 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4672 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4673 			upper_32_bits(wb_gpu_addr) & 0xffff;
4674 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4675 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4676 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4677 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4678 
4679 		/* enable the doorbell if requested */
4680 		if (use_doorbell) {
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4682 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4683 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4684 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4685 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4686 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4687 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4688 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4689 
4690 		} else {
4691 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4692 		}
4693 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4694 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4695 
4696 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4697 		rdev->ring[idx].wptr = 0;
4698 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4699 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4700 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4701 
4702 		/* set the vmid for the queue */
4703 		mqd->queue_state.cp_hqd_vmid = 0;
4704 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4705 
4706 		/* activate the queue */
4707 		mqd->queue_state.cp_hqd_active = 1;
4708 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4709 
4710 		cik_srbm_select(rdev, 0, 0, 0, 0);
4711 		mutex_unlock(&rdev->srbm_mutex);
4712 
4713 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4714 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4715 
4716 		rdev->ring[idx].ready = true;
4717 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4718 		if (r)
4719 			rdev->ring[idx].ready = false;
4720 	}
4721 
4722 	return 0;
4723 }
4724 
4725 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4726 {
4727 	cik_cp_gfx_enable(rdev, enable);
4728 	cik_cp_compute_enable(rdev, enable);
4729 }
4730 
4731 static int cik_cp_load_microcode(struct radeon_device *rdev)
4732 {
4733 	int r;
4734 
4735 	r = cik_cp_gfx_load_microcode(rdev);
4736 	if (r)
4737 		return r;
4738 	r = cik_cp_compute_load_microcode(rdev);
4739 	if (r)
4740 		return r;
4741 
4742 	return 0;
4743 }
4744 
4745 static void cik_cp_fini(struct radeon_device *rdev)
4746 {
4747 	cik_cp_gfx_fini(rdev);
4748 	cik_cp_compute_fini(rdev);
4749 }
4750 
4751 static int cik_cp_resume(struct radeon_device *rdev)
4752 {
4753 	int r;
4754 
4755 	cik_enable_gui_idle_interrupt(rdev, false);
4756 
4757 	r = cik_cp_load_microcode(rdev);
4758 	if (r)
4759 		return r;
4760 
4761 	r = cik_cp_gfx_resume(rdev);
4762 	if (r)
4763 		return r;
4764 	r = cik_cp_compute_resume(rdev);
4765 	if (r)
4766 		return r;
4767 
4768 	cik_enable_gui_idle_interrupt(rdev, true);
4769 
4770 	return 0;
4771 }
4772 
4773 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4774 {
4775 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4776 		RREG32(GRBM_STATUS));
4777 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4778 		RREG32(GRBM_STATUS2));
4779 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4780 		RREG32(GRBM_STATUS_SE0));
4781 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4782 		RREG32(GRBM_STATUS_SE1));
4783 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4784 		RREG32(GRBM_STATUS_SE2));
4785 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4786 		RREG32(GRBM_STATUS_SE3));
4787 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4788 		RREG32(SRBM_STATUS));
4789 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4790 		RREG32(SRBM_STATUS2));
4791 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4792 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4793 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4794 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4795 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4796 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4797 		 RREG32(CP_STALLED_STAT1));
4798 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4799 		 RREG32(CP_STALLED_STAT2));
4800 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4801 		 RREG32(CP_STALLED_STAT3));
4802 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4803 		 RREG32(CP_CPF_BUSY_STAT));
4804 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4805 		 RREG32(CP_CPF_STALLED_STAT1));
4806 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4807 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4808 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4809 		 RREG32(CP_CPC_STALLED_STAT1));
4810 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4811 }
4812 
4813 /**
4814  * cik_gpu_check_soft_reset - check which blocks are busy
4815  *
4816  * @rdev: radeon_device pointer
4817  *
4818  * Check which blocks are busy and return the relevant reset
4819  * mask to be used by cik_gpu_soft_reset().
4820  * Returns a mask of the blocks to be reset.
4821  */
4822 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4823 {
4824 	u32 reset_mask = 0;
4825 	u32 tmp;
4826 
4827 	/* GRBM_STATUS */
4828 	tmp = RREG32(GRBM_STATUS);
4829 	if (tmp & (PA_BUSY | SC_BUSY |
4830 		   BCI_BUSY | SX_BUSY |
4831 		   TA_BUSY | VGT_BUSY |
4832 		   DB_BUSY | CB_BUSY |
4833 		   GDS_BUSY | SPI_BUSY |
4834 		   IA_BUSY | IA_BUSY_NO_DMA))
4835 		reset_mask |= RADEON_RESET_GFX;
4836 
4837 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4838 		reset_mask |= RADEON_RESET_CP;
4839 
4840 	/* GRBM_STATUS2 */
4841 	tmp = RREG32(GRBM_STATUS2);
4842 	if (tmp & RLC_BUSY)
4843 		reset_mask |= RADEON_RESET_RLC;
4844 
4845 	/* SDMA0_STATUS_REG */
4846 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4847 	if (!(tmp & SDMA_IDLE))
4848 		reset_mask |= RADEON_RESET_DMA;
4849 
4850 	/* SDMA1_STATUS_REG */
4851 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4852 	if (!(tmp & SDMA_IDLE))
4853 		reset_mask |= RADEON_RESET_DMA1;
4854 
4855 	/* SRBM_STATUS2 */
4856 	tmp = RREG32(SRBM_STATUS2);
4857 	if (tmp & SDMA_BUSY)
4858 		reset_mask |= RADEON_RESET_DMA;
4859 
4860 	if (tmp & SDMA1_BUSY)
4861 		reset_mask |= RADEON_RESET_DMA1;
4862 
4863 	/* SRBM_STATUS */
4864 	tmp = RREG32(SRBM_STATUS);
4865 
4866 	if (tmp & IH_BUSY)
4867 		reset_mask |= RADEON_RESET_IH;
4868 
4869 	if (tmp & SEM_BUSY)
4870 		reset_mask |= RADEON_RESET_SEM;
4871 
4872 	if (tmp & GRBM_RQ_PENDING)
4873 		reset_mask |= RADEON_RESET_GRBM;
4874 
4875 	if (tmp & VMC_BUSY)
4876 		reset_mask |= RADEON_RESET_VMC;
4877 
4878 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4879 		   MCC_BUSY | MCD_BUSY))
4880 		reset_mask |= RADEON_RESET_MC;
4881 
4882 	if (evergreen_is_display_hung(rdev))
4883 		reset_mask |= RADEON_RESET_DISPLAY;
4884 
4885 	/* Skip MC reset as it's mostly likely not hung, just busy */
4886 	if (reset_mask & RADEON_RESET_MC) {
4887 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4888 		reset_mask &= ~RADEON_RESET_MC;
4889 	}
4890 
4891 	return reset_mask;
4892 }
4893 
4894 /**
4895  * cik_gpu_soft_reset - soft reset GPU
4896  *
4897  * @rdev: radeon_device pointer
4898  * @reset_mask: mask of which blocks to reset
4899  *
4900  * Soft reset the blocks specified in @reset_mask.
4901  */
4902 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4903 {
4904 	struct evergreen_mc_save save;
4905 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4906 	u32 tmp;
4907 
4908 	if (reset_mask == 0)
4909 		return;
4910 
4911 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4912 
4913 	cik_print_gpu_status_regs(rdev);
4914 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4915 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4916 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4917 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4918 
4919 	/* disable CG/PG */
4920 	cik_fini_pg(rdev);
4921 	cik_fini_cg(rdev);
4922 
4923 	/* stop the rlc */
4924 	cik_rlc_stop(rdev);
4925 
4926 	/* Disable GFX parsing/prefetching */
4927 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4928 
4929 	/* Disable MEC parsing/prefetching */
4930 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4931 
4932 	if (reset_mask & RADEON_RESET_DMA) {
4933 		/* sdma0 */
4934 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4935 		tmp |= SDMA_HALT;
4936 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4937 	}
4938 	if (reset_mask & RADEON_RESET_DMA1) {
4939 		/* sdma1 */
4940 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4941 		tmp |= SDMA_HALT;
4942 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4943 	}
4944 
4945 	evergreen_mc_stop(rdev, &save);
4946 	if (evergreen_mc_wait_for_idle(rdev)) {
4947 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4948 	}
4949 
4950 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4951 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4952 
4953 	if (reset_mask & RADEON_RESET_CP) {
4954 		grbm_soft_reset |= SOFT_RESET_CP;
4955 
4956 		srbm_soft_reset |= SOFT_RESET_GRBM;
4957 	}
4958 
4959 	if (reset_mask & RADEON_RESET_DMA)
4960 		srbm_soft_reset |= SOFT_RESET_SDMA;
4961 
4962 	if (reset_mask & RADEON_RESET_DMA1)
4963 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4964 
4965 	if (reset_mask & RADEON_RESET_DISPLAY)
4966 		srbm_soft_reset |= SOFT_RESET_DC;
4967 
4968 	if (reset_mask & RADEON_RESET_RLC)
4969 		grbm_soft_reset |= SOFT_RESET_RLC;
4970 
4971 	if (reset_mask & RADEON_RESET_SEM)
4972 		srbm_soft_reset |= SOFT_RESET_SEM;
4973 
4974 	if (reset_mask & RADEON_RESET_IH)
4975 		srbm_soft_reset |= SOFT_RESET_IH;
4976 
4977 	if (reset_mask & RADEON_RESET_GRBM)
4978 		srbm_soft_reset |= SOFT_RESET_GRBM;
4979 
4980 	if (reset_mask & RADEON_RESET_VMC)
4981 		srbm_soft_reset |= SOFT_RESET_VMC;
4982 
4983 	if (!(rdev->flags & RADEON_IS_IGP)) {
4984 		if (reset_mask & RADEON_RESET_MC)
4985 			srbm_soft_reset |= SOFT_RESET_MC;
4986 	}
4987 
4988 	if (grbm_soft_reset) {
4989 		tmp = RREG32(GRBM_SOFT_RESET);
4990 		tmp |= grbm_soft_reset;
4991 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4992 		WREG32(GRBM_SOFT_RESET, tmp);
4993 		tmp = RREG32(GRBM_SOFT_RESET);
4994 
4995 		udelay(50);
4996 
4997 		tmp &= ~grbm_soft_reset;
4998 		WREG32(GRBM_SOFT_RESET, tmp);
4999 		tmp = RREG32(GRBM_SOFT_RESET);
5000 	}
5001 
5002 	if (srbm_soft_reset) {
5003 		tmp = RREG32(SRBM_SOFT_RESET);
5004 		tmp |= srbm_soft_reset;
5005 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5006 		WREG32(SRBM_SOFT_RESET, tmp);
5007 		tmp = RREG32(SRBM_SOFT_RESET);
5008 
5009 		udelay(50);
5010 
5011 		tmp &= ~srbm_soft_reset;
5012 		WREG32(SRBM_SOFT_RESET, tmp);
5013 		tmp = RREG32(SRBM_SOFT_RESET);
5014 	}
5015 
5016 	/* Wait a little for things to settle down */
5017 	udelay(50);
5018 
5019 	evergreen_mc_resume(rdev, &save);
5020 	udelay(50);
5021 
5022 	cik_print_gpu_status_regs(rdev);
5023 }
5024 
5025 struct kv_reset_save_regs {
5026 	u32 gmcon_reng_execute;
5027 	u32 gmcon_misc;
5028 	u32 gmcon_misc3;
5029 };
5030 
5031 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5032 				   struct kv_reset_save_regs *save)
5033 {
5034 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5035 	save->gmcon_misc = RREG32(GMCON_MISC);
5036 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5037 
5038 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5039 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5040 						STCTRL_STUTTER_EN));
5041 }
5042 
5043 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5044 				      struct kv_reset_save_regs *save)
5045 {
5046 	int i;
5047 
5048 	WREG32(GMCON_PGFSM_WRITE, 0);
5049 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5050 
5051 	for (i = 0; i < 5; i++)
5052 		WREG32(GMCON_PGFSM_WRITE, 0);
5053 
5054 	WREG32(GMCON_PGFSM_WRITE, 0);
5055 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5056 
5057 	for (i = 0; i < 5; i++)
5058 		WREG32(GMCON_PGFSM_WRITE, 0);
5059 
5060 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5061 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5062 
5063 	for (i = 0; i < 5; i++)
5064 		WREG32(GMCON_PGFSM_WRITE, 0);
5065 
5066 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5067 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5068 
5069 	for (i = 0; i < 5; i++)
5070 		WREG32(GMCON_PGFSM_WRITE, 0);
5071 
5072 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5073 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5074 
5075 	for (i = 0; i < 5; i++)
5076 		WREG32(GMCON_PGFSM_WRITE, 0);
5077 
5078 	WREG32(GMCON_PGFSM_WRITE, 0);
5079 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5080 
5081 	for (i = 0; i < 5; i++)
5082 		WREG32(GMCON_PGFSM_WRITE, 0);
5083 
5084 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5085 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5086 
5087 	for (i = 0; i < 5; i++)
5088 		WREG32(GMCON_PGFSM_WRITE, 0);
5089 
5090 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5091 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5092 
5093 	for (i = 0; i < 5; i++)
5094 		WREG32(GMCON_PGFSM_WRITE, 0);
5095 
5096 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5097 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5098 
5099 	for (i = 0; i < 5; i++)
5100 		WREG32(GMCON_PGFSM_WRITE, 0);
5101 
5102 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5103 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5104 
5105 	for (i = 0; i < 5; i++)
5106 		WREG32(GMCON_PGFSM_WRITE, 0);
5107 
5108 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5109 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5110 
5111 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5112 	WREG32(GMCON_MISC, save->gmcon_misc);
5113 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5114 }
5115 
5116 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5117 {
5118 	struct evergreen_mc_save save;
5119 	struct kv_reset_save_regs kv_save = { 0 };
5120 	u32 tmp, i;
5121 
5122 	dev_info(rdev->dev, "GPU pci config reset\n");
5123 
5124 	/* disable dpm? */
5125 
5126 	/* disable cg/pg */
5127 	cik_fini_pg(rdev);
5128 	cik_fini_cg(rdev);
5129 
5130 	/* Disable GFX parsing/prefetching */
5131 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5132 
5133 	/* Disable MEC parsing/prefetching */
5134 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5135 
5136 	/* sdma0 */
5137 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5138 	tmp |= SDMA_HALT;
5139 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5140 	/* sdma1 */
5141 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5142 	tmp |= SDMA_HALT;
5143 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5144 	/* XXX other engines? */
5145 
5146 	/* halt the rlc, disable cp internal ints */
5147 	cik_rlc_stop(rdev);
5148 
5149 	udelay(50);
5150 
5151 	/* disable mem access */
5152 	evergreen_mc_stop(rdev, &save);
5153 	if (evergreen_mc_wait_for_idle(rdev)) {
5154 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5155 	}
5156 
5157 	if (rdev->flags & RADEON_IS_IGP)
5158 		kv_save_regs_for_reset(rdev, &kv_save);
5159 
5160 	/* disable BM */
5161 	pci_clear_master(rdev->pdev);
5162 	/* reset */
5163 	radeon_pci_config_reset(rdev);
5164 
5165 	udelay(100);
5166 
5167 	/* wait for asic to come out of reset */
5168 	for (i = 0; i < rdev->usec_timeout; i++) {
5169 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5170 			break;
5171 		udelay(1);
5172 	}
5173 
5174 	/* does asic init need to be run first??? */
5175 	if (rdev->flags & RADEON_IS_IGP)
5176 		kv_restore_regs_for_reset(rdev, &kv_save);
5177 }
5178 
5179 /**
5180  * cik_asic_reset - soft reset GPU
5181  *
5182  * @rdev: radeon_device pointer
5183  *
5184  * Look up which blocks are hung and attempt
5185  * to reset them.
5186  * Returns 0 for success.
5187  */
5188 int cik_asic_reset(struct radeon_device *rdev)
5189 {
5190 	u32 reset_mask;
5191 
5192 	reset_mask = cik_gpu_check_soft_reset(rdev);
5193 
5194 	if (reset_mask)
5195 		r600_set_bios_scratch_engine_hung(rdev, true);
5196 
5197 	/* try soft reset */
5198 	cik_gpu_soft_reset(rdev, reset_mask);
5199 
5200 	reset_mask = cik_gpu_check_soft_reset(rdev);
5201 
5202 	/* try pci config reset */
5203 	if (reset_mask && radeon_hard_reset)
5204 		cik_gpu_pci_config_reset(rdev);
5205 
5206 	reset_mask = cik_gpu_check_soft_reset(rdev);
5207 
5208 	if (!reset_mask)
5209 		r600_set_bios_scratch_engine_hung(rdev, false);
5210 
5211 	return 0;
5212 }
5213 
5214 /**
5215  * cik_gfx_is_lockup - check if the 3D engine is locked up
5216  *
5217  * @rdev: radeon_device pointer
5218  * @ring: radeon_ring structure holding ring information
5219  *
5220  * Check if the 3D engine is locked up (CIK).
5221  * Returns true if the engine is locked, false if not.
5222  */
5223 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5224 {
5225 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5226 
5227 	if (!(reset_mask & (RADEON_RESET_GFX |
5228 			    RADEON_RESET_COMPUTE |
5229 			    RADEON_RESET_CP))) {
5230 		radeon_ring_lockup_update(rdev, ring);
5231 		return false;
5232 	}
5233 	return radeon_ring_test_lockup(rdev, ring);
5234 }
5235 
5236 /* MC */
5237 /**
5238  * cik_mc_program - program the GPU memory controller
5239  *
5240  * @rdev: radeon_device pointer
5241  *
5242  * Set the location of vram, gart, and AGP in the GPU's
5243  * physical address space (CIK).
5244  */
5245 static void cik_mc_program(struct radeon_device *rdev)
5246 {
5247 	struct evergreen_mc_save save;
5248 	u32 tmp;
5249 	int i, j;
5250 
5251 	/* Initialize HDP */
5252 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5253 		WREG32((0x2c14 + j), 0x00000000);
5254 		WREG32((0x2c18 + j), 0x00000000);
5255 		WREG32((0x2c1c + j), 0x00000000);
5256 		WREG32((0x2c20 + j), 0x00000000);
5257 		WREG32((0x2c24 + j), 0x00000000);
5258 	}
5259 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5260 
5261 	evergreen_mc_stop(rdev, &save);
5262 	if (radeon_mc_wait_for_idle(rdev)) {
5263 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5264 	}
5265 	/* Lockout access through VGA aperture*/
5266 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5267 	/* Update configuration */
5268 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5269 	       rdev->mc.vram_start >> 12);
5270 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5271 	       rdev->mc.vram_end >> 12);
5272 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5273 	       rdev->vram_scratch.gpu_addr >> 12);
5274 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5275 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5276 	WREG32(MC_VM_FB_LOCATION, tmp);
5277 	/* XXX double check these! */
5278 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5279 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5280 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5281 	WREG32(MC_VM_AGP_BASE, 0);
5282 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5283 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5284 	if (radeon_mc_wait_for_idle(rdev)) {
5285 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5286 	}
5287 	evergreen_mc_resume(rdev, &save);
5288 	/* we need to own VRAM, so turn off the VGA renderer here
5289 	 * to stop it overwriting our objects */
5290 	rv515_vga_render_disable(rdev);
5291 }
5292 
5293 /**
5294  * cik_mc_init - initialize the memory controller driver params
5295  *
5296  * @rdev: radeon_device pointer
5297  *
5298  * Look up the amount of vram, vram width, and decide how to place
5299  * vram and gart within the GPU's physical address space (CIK).
5300  * Returns 0 for success.
5301  */
5302 static int cik_mc_init(struct radeon_device *rdev)
5303 {
5304 	u32 tmp;
5305 	int chansize, numchan;
5306 
5307 	/* Get VRAM informations */
5308 	rdev->mc.vram_is_ddr = true;
5309 	tmp = RREG32(MC_ARB_RAMCFG);
5310 	if (tmp & CHANSIZE_MASK) {
5311 		chansize = 64;
5312 	} else {
5313 		chansize = 32;
5314 	}
5315 	tmp = RREG32(MC_SHARED_CHMAP);
5316 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5317 	case 0:
5318 	default:
5319 		numchan = 1;
5320 		break;
5321 	case 1:
5322 		numchan = 2;
5323 		break;
5324 	case 2:
5325 		numchan = 4;
5326 		break;
5327 	case 3:
5328 		numchan = 8;
5329 		break;
5330 	case 4:
5331 		numchan = 3;
5332 		break;
5333 	case 5:
5334 		numchan = 6;
5335 		break;
5336 	case 6:
5337 		numchan = 10;
5338 		break;
5339 	case 7:
5340 		numchan = 12;
5341 		break;
5342 	case 8:
5343 		numchan = 16;
5344 		break;
5345 	}
5346 	rdev->mc.vram_width = numchan * chansize;
5347 	/* Could aper size report 0 ? */
5348 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5349 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5350 	/* size in MB on si */
5351 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5352 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5353 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5354 	si_vram_gtt_location(rdev, &rdev->mc);
5355 	radeon_update_bandwidth_info(rdev);
5356 
5357 	return 0;
5358 }
5359 
5360 /*
5361  * GART
5362  * VMID 0 is the physical GPU addresses as used by the kernel.
5363  * VMIDs 1-15 are used for userspace clients and are handled
5364  * by the radeon vm/hsa code.
5365  */
5366 /**
5367  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5368  *
5369  * @rdev: radeon_device pointer
5370  *
5371  * Flush the TLB for the VMID 0 page table (CIK).
5372  */
5373 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5374 {
5375 	/* flush hdp cache */
5376 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5377 
5378 	/* bits 0-15 are the VM contexts0-15 */
5379 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5380 }
5381 
5382 /**
5383  * cik_pcie_gart_enable - gart enable
5384  *
5385  * @rdev: radeon_device pointer
5386  *
5387  * This sets up the TLBs, programs the page tables for VMID0,
5388  * sets up the hw for VMIDs 1-15 which are allocated on
5389  * demand, and sets up the global locations for the LDS, GDS,
5390  * and GPUVM for FSA64 clients (CIK).
5391  * Returns 0 for success, errors for failure.
5392  */
5393 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5394 {
5395 	int r, i;
5396 
5397 	if (rdev->gart.robj == NULL) {
5398 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5399 		return -EINVAL;
5400 	}
5401 	r = radeon_gart_table_vram_pin(rdev);
5402 	if (r)
5403 		return r;
5404 	radeon_gart_restore(rdev);
5405 	/* Setup TLB control */
5406 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5407 	       (0xA << 7) |
5408 	       ENABLE_L1_TLB |
5409 	       ENABLE_L1_FRAGMENT_PROCESSING |
5410 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411 	       ENABLE_ADVANCED_DRIVER_MODEL |
5412 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5413 	/* Setup L2 cache */
5414 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5415 	       ENABLE_L2_FRAGMENT_PROCESSING |
5416 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5417 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5418 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5419 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5420 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5421 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5422 	       BANK_SELECT(4) |
5423 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5424 	/* setup context0 */
5425 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5426 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5427 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5428 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5429 			(u32)(rdev->dummy_page.addr >> 12));
5430 	WREG32(VM_CONTEXT0_CNTL2, 0);
5431 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5432 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5433 
5434 	WREG32(0x15D4, 0);
5435 	WREG32(0x15D8, 0);
5436 	WREG32(0x15DC, 0);
5437 
5438 	/* empty context1-15 */
5439 	/* FIXME start with 4G, once using 2 level pt switch to full
5440 	 * vm size space
5441 	 */
5442 	/* set vm size, must be a multiple of 4 */
5443 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5444 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5445 	for (i = 1; i < 16; i++) {
5446 		if (i < 8)
5447 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5448 			       rdev->gart.table_addr >> 12);
5449 		else
5450 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5451 			       rdev->gart.table_addr >> 12);
5452 	}
5453 
5454 	/* enable context1-15 */
5455 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5456 	       (u32)(rdev->dummy_page.addr >> 12));
5457 	WREG32(VM_CONTEXT1_CNTL2, 4);
5458 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5459 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5460 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5461 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5462 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5463 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5464 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5465 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5466 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5467 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5468 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5469 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5470 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5471 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5472 
5473 	if (rdev->family == CHIP_KAVERI) {
5474 		u32 tmp = RREG32(CHUB_CONTROL);
5475 		tmp &= ~BYPASS_VM;
5476 		WREG32(CHUB_CONTROL, tmp);
5477 	}
5478 
5479 	/* XXX SH_MEM regs */
5480 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5481 	mutex_lock(&rdev->srbm_mutex);
5482 	for (i = 0; i < 16; i++) {
5483 		cik_srbm_select(rdev, 0, 0, 0, i);
5484 		/* CP and shaders */
5485 		WREG32(SH_MEM_CONFIG, 0);
5486 		WREG32(SH_MEM_APE1_BASE, 1);
5487 		WREG32(SH_MEM_APE1_LIMIT, 0);
5488 		WREG32(SH_MEM_BASES, 0);
5489 		/* SDMA GFX */
5490 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5491 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5492 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5493 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5494 		/* XXX SDMA RLC - todo */
5495 	}
5496 	cik_srbm_select(rdev, 0, 0, 0, 0);
5497 	mutex_unlock(&rdev->srbm_mutex);
5498 
5499 	cik_pcie_gart_tlb_flush(rdev);
5500 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5501 		 (unsigned)(rdev->mc.gtt_size >> 20),
5502 		 (unsigned long long)rdev->gart.table_addr);
5503 	rdev->gart.ready = true;
5504 	return 0;
5505 }
5506 
5507 /**
5508  * cik_pcie_gart_disable - gart disable
5509  *
5510  * @rdev: radeon_device pointer
5511  *
5512  * This disables all VM page table (CIK).
5513  */
5514 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5515 {
5516 	/* Disable all tables */
5517 	WREG32(VM_CONTEXT0_CNTL, 0);
5518 	WREG32(VM_CONTEXT1_CNTL, 0);
5519 	/* Setup TLB control */
5520 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5521 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5522 	/* Setup L2 cache */
5523 	WREG32(VM_L2_CNTL,
5524 	       ENABLE_L2_FRAGMENT_PROCESSING |
5525 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5526 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5527 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5528 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5529 	WREG32(VM_L2_CNTL2, 0);
5530 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5531 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5532 	radeon_gart_table_vram_unpin(rdev);
5533 }
5534 
5535 /**
5536  * cik_pcie_gart_fini - vm fini callback
5537  *
5538  * @rdev: radeon_device pointer
5539  *
5540  * Tears down the driver GART/VM setup (CIK).
5541  */
5542 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5543 {
5544 	cik_pcie_gart_disable(rdev);
5545 	radeon_gart_table_vram_free(rdev);
5546 	radeon_gart_fini(rdev);
5547 }
5548 
5549 /* vm parser */
5550 /**
5551  * cik_ib_parse - vm ib_parse callback
5552  *
5553  * @rdev: radeon_device pointer
5554  * @ib: indirect buffer pointer
5555  *
5556  * CIK uses hw IB checking so this is a nop (CIK).
5557  */
5558 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5559 {
5560 	return 0;
5561 }
5562 
5563 /*
5564  * vm
5565  * VMID 0 is the physical GPU addresses as used by the kernel.
5566  * VMIDs 1-15 are used for userspace clients and are handled
5567  * by the radeon vm/hsa code.
5568  */
5569 /**
5570  * cik_vm_init - cik vm init callback
5571  *
5572  * @rdev: radeon_device pointer
5573  *
5574  * Inits cik specific vm parameters (number of VMs, base of vram for
5575  * VMIDs 1-15) (CIK).
5576  * Returns 0 for success.
5577  */
5578 int cik_vm_init(struct radeon_device *rdev)
5579 {
5580 	/* number of VMs */
5581 	rdev->vm_manager.nvm = 16;
5582 	/* base offset of vram pages */
5583 	if (rdev->flags & RADEON_IS_IGP) {
5584 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5585 		tmp <<= 22;
5586 		rdev->vm_manager.vram_base_offset = tmp;
5587 	} else
5588 		rdev->vm_manager.vram_base_offset = 0;
5589 
5590 	return 0;
5591 }
5592 
5593 /**
5594  * cik_vm_fini - cik vm fini callback
5595  *
5596  * @rdev: radeon_device pointer
5597  *
5598  * Tear down any asic specific VM setup (CIK).
5599  */
5600 void cik_vm_fini(struct radeon_device *rdev)
5601 {
5602 }
5603 
5604 /**
5605  * cik_vm_decode_fault - print human readable fault info
5606  *
5607  * @rdev: radeon_device pointer
5608  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5609  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5610  *
5611  * Print human readable fault information (CIK).
5612  */
5613 static void cik_vm_decode_fault(struct radeon_device *rdev,
5614 				u32 status, u32 addr, u32 mc_client)
5615 {
5616 	u32 mc_id;
5617 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5618 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5619 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5620 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5621 
5622 	if (rdev->family == CHIP_HAWAII)
5623 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5624 	else
5625 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5626 
5627 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5628 	       protections, vmid, addr,
5629 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5630 	       block, mc_client, mc_id);
5631 }
5632 
5633 /**
5634  * cik_vm_flush - cik vm flush using the CP
5635  *
5636  * @rdev: radeon_device pointer
5637  *
5638  * Update the page table base and flush the VM TLB
5639  * using the CP (CIK).
5640  */
5641 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5642 {
5643 	struct radeon_ring *ring = &rdev->ring[ridx];
5644 
5645 	if (vm == NULL)
5646 		return;
5647 
5648 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5649 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5650 				 WRITE_DATA_DST_SEL(0)));
5651 	if (vm->id < 8) {
5652 		radeon_ring_write(ring,
5653 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5654 	} else {
5655 		radeon_ring_write(ring,
5656 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5657 	}
5658 	radeon_ring_write(ring, 0);
5659 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5660 
5661 	/* update SH_MEM_* regs */
5662 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5663 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5664 				 WRITE_DATA_DST_SEL(0)));
5665 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5666 	radeon_ring_write(ring, 0);
5667 	radeon_ring_write(ring, VMID(vm->id));
5668 
5669 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5670 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671 				 WRITE_DATA_DST_SEL(0)));
5672 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5673 	radeon_ring_write(ring, 0);
5674 
5675 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5676 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5677 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5678 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5679 
5680 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5681 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5682 				 WRITE_DATA_DST_SEL(0)));
5683 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5684 	radeon_ring_write(ring, 0);
5685 	radeon_ring_write(ring, VMID(0));
5686 
5687 	/* HDP flush */
5688 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5689 
5690 	/* bits 0-15 are the VM contexts0-15 */
5691 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5692 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5693 				 WRITE_DATA_DST_SEL(0)));
5694 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5695 	radeon_ring_write(ring, 0);
5696 	radeon_ring_write(ring, 1 << vm->id);
5697 
5698 	/* compute doesn't have PFP */
5699 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5700 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5701 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5702 		radeon_ring_write(ring, 0x0);
5703 	}
5704 }
5705 
5706 /*
5707  * RLC
5708  * The RLC is a multi-purpose microengine that handles a
5709  * variety of functions, the most important of which is
5710  * the interrupt controller.
5711  */
5712 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5713 					  bool enable)
5714 {
5715 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5716 
5717 	if (enable)
5718 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5719 	else
5720 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5721 	WREG32(CP_INT_CNTL_RING0, tmp);
5722 }
5723 
5724 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5725 {
5726 	u32 tmp;
5727 
5728 	tmp = RREG32(RLC_LB_CNTL);
5729 	if (enable)
5730 		tmp |= LOAD_BALANCE_ENABLE;
5731 	else
5732 		tmp &= ~LOAD_BALANCE_ENABLE;
5733 	WREG32(RLC_LB_CNTL, tmp);
5734 }
5735 
5736 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5737 {
5738 	u32 i, j, k;
5739 	u32 mask;
5740 
5741 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5742 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5743 			cik_select_se_sh(rdev, i, j);
5744 			for (k = 0; k < rdev->usec_timeout; k++) {
5745 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5746 					break;
5747 				udelay(1);
5748 			}
5749 		}
5750 	}
5751 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5752 
5753 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5754 	for (k = 0; k < rdev->usec_timeout; k++) {
5755 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5756 			break;
5757 		udelay(1);
5758 	}
5759 }
5760 
5761 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5762 {
5763 	u32 tmp;
5764 
5765 	tmp = RREG32(RLC_CNTL);
5766 	if (tmp != rlc)
5767 		WREG32(RLC_CNTL, rlc);
5768 }
5769 
5770 static u32 cik_halt_rlc(struct radeon_device *rdev)
5771 {
5772 	u32 data, orig;
5773 
5774 	orig = data = RREG32(RLC_CNTL);
5775 
5776 	if (data & RLC_ENABLE) {
5777 		u32 i;
5778 
5779 		data &= ~RLC_ENABLE;
5780 		WREG32(RLC_CNTL, data);
5781 
5782 		for (i = 0; i < rdev->usec_timeout; i++) {
5783 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5784 				break;
5785 			udelay(1);
5786 		}
5787 
5788 		cik_wait_for_rlc_serdes(rdev);
5789 	}
5790 
5791 	return orig;
5792 }
5793 
5794 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5795 {
5796 	u32 tmp, i, mask;
5797 
5798 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5799 	WREG32(RLC_GPR_REG2, tmp);
5800 
5801 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5802 	for (i = 0; i < rdev->usec_timeout; i++) {
5803 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5804 			break;
5805 		udelay(1);
5806 	}
5807 
5808 	for (i = 0; i < rdev->usec_timeout; i++) {
5809 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5810 			break;
5811 		udelay(1);
5812 	}
5813 }
5814 
5815 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5816 {
5817 	u32 tmp;
5818 
5819 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5820 	WREG32(RLC_GPR_REG2, tmp);
5821 }
5822 
5823 /**
5824  * cik_rlc_stop - stop the RLC ME
5825  *
5826  * @rdev: radeon_device pointer
5827  *
5828  * Halt the RLC ME (MicroEngine) (CIK).
5829  */
5830 static void cik_rlc_stop(struct radeon_device *rdev)
5831 {
5832 	WREG32(RLC_CNTL, 0);
5833 
5834 	cik_enable_gui_idle_interrupt(rdev, false);
5835 
5836 	cik_wait_for_rlc_serdes(rdev);
5837 }
5838 
5839 /**
5840  * cik_rlc_start - start the RLC ME
5841  *
5842  * @rdev: radeon_device pointer
5843  *
5844  * Unhalt the RLC ME (MicroEngine) (CIK).
5845  */
5846 static void cik_rlc_start(struct radeon_device *rdev)
5847 {
5848 	WREG32(RLC_CNTL, RLC_ENABLE);
5849 
5850 	cik_enable_gui_idle_interrupt(rdev, true);
5851 
5852 	udelay(50);
5853 }
5854 
5855 /**
5856  * cik_rlc_resume - setup the RLC hw
5857  *
5858  * @rdev: radeon_device pointer
5859  *
5860  * Initialize the RLC registers, load the ucode,
5861  * and start the RLC (CIK).
5862  * Returns 0 for success, -EINVAL if the ucode is not available.
5863  */
5864 static int cik_rlc_resume(struct radeon_device *rdev)
5865 {
5866 	u32 i, size, tmp;
5867 	const __be32 *fw_data;
5868 
5869 	if (!rdev->rlc_fw)
5870 		return -EINVAL;
5871 
5872 	switch (rdev->family) {
5873 	case CHIP_BONAIRE:
5874 	case CHIP_HAWAII:
5875 	default:
5876 		size = BONAIRE_RLC_UCODE_SIZE;
5877 		break;
5878 	case CHIP_KAVERI:
5879 		size = KV_RLC_UCODE_SIZE;
5880 		break;
5881 	case CHIP_KABINI:
5882 		size = KB_RLC_UCODE_SIZE;
5883 		break;
5884 	case CHIP_MULLINS:
5885 		size = ML_RLC_UCODE_SIZE;
5886 		break;
5887 	}
5888 
5889 	cik_rlc_stop(rdev);
5890 
5891 	/* disable CG */
5892 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5893 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5894 
5895 	si_rlc_reset(rdev);
5896 
5897 	cik_init_pg(rdev);
5898 
5899 	cik_init_cg(rdev);
5900 
5901 	WREG32(RLC_LB_CNTR_INIT, 0);
5902 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5903 
5904 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5905 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5906 	WREG32(RLC_LB_PARAMS, 0x00600408);
5907 	WREG32(RLC_LB_CNTL, 0x80000004);
5908 
5909 	WREG32(RLC_MC_CNTL, 0);
5910 	WREG32(RLC_UCODE_CNTL, 0);
5911 
5912 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5913 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5914 	for (i = 0; i < size; i++)
5915 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5916 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5917 
5918 	/* XXX - find out what chips support lbpw */
5919 	cik_enable_lbpw(rdev, false);
5920 
5921 	if (rdev->family == CHIP_BONAIRE)
5922 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5923 
5924 	cik_rlc_start(rdev);
5925 
5926 	return 0;
5927 }
5928 
5929 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5930 {
5931 	u32 data, orig, tmp, tmp2;
5932 
5933 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5934 
5935 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5936 		cik_enable_gui_idle_interrupt(rdev, true);
5937 
5938 		tmp = cik_halt_rlc(rdev);
5939 
5940 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5941 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5942 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5943 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5944 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5945 
5946 		cik_update_rlc(rdev, tmp);
5947 
5948 		data |= CGCG_EN | CGLS_EN;
5949 	} else {
5950 		cik_enable_gui_idle_interrupt(rdev, false);
5951 
5952 		RREG32(CB_CGTT_SCLK_CTRL);
5953 		RREG32(CB_CGTT_SCLK_CTRL);
5954 		RREG32(CB_CGTT_SCLK_CTRL);
5955 		RREG32(CB_CGTT_SCLK_CTRL);
5956 
5957 		data &= ~(CGCG_EN | CGLS_EN);
5958 	}
5959 
5960 	if (orig != data)
5961 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5962 
5963 }
5964 
5965 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5966 {
5967 	u32 data, orig, tmp = 0;
5968 
5969 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5970 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5971 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5972 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5973 				data |= CP_MEM_LS_EN;
5974 				if (orig != data)
5975 					WREG32(CP_MEM_SLP_CNTL, data);
5976 			}
5977 		}
5978 
5979 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5980 		data &= 0xfffffffd;
5981 		if (orig != data)
5982 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5983 
5984 		tmp = cik_halt_rlc(rdev);
5985 
5986 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5987 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5988 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5989 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5990 		WREG32(RLC_SERDES_WR_CTRL, data);
5991 
5992 		cik_update_rlc(rdev, tmp);
5993 
5994 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5995 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5996 			data &= ~SM_MODE_MASK;
5997 			data |= SM_MODE(0x2);
5998 			data |= SM_MODE_ENABLE;
5999 			data &= ~CGTS_OVERRIDE;
6000 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6001 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6002 				data &= ~CGTS_LS_OVERRIDE;
6003 			data &= ~ON_MONITOR_ADD_MASK;
6004 			data |= ON_MONITOR_ADD_EN;
6005 			data |= ON_MONITOR_ADD(0x96);
6006 			if (orig != data)
6007 				WREG32(CGTS_SM_CTRL_REG, data);
6008 		}
6009 	} else {
6010 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6011 		data |= 0x00000002;
6012 		if (orig != data)
6013 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6014 
6015 		data = RREG32(RLC_MEM_SLP_CNTL);
6016 		if (data & RLC_MEM_LS_EN) {
6017 			data &= ~RLC_MEM_LS_EN;
6018 			WREG32(RLC_MEM_SLP_CNTL, data);
6019 		}
6020 
6021 		data = RREG32(CP_MEM_SLP_CNTL);
6022 		if (data & CP_MEM_LS_EN) {
6023 			data &= ~CP_MEM_LS_EN;
6024 			WREG32(CP_MEM_SLP_CNTL, data);
6025 		}
6026 
6027 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6028 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6029 		if (orig != data)
6030 			WREG32(CGTS_SM_CTRL_REG, data);
6031 
6032 		tmp = cik_halt_rlc(rdev);
6033 
6034 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6035 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6036 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6037 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6038 		WREG32(RLC_SERDES_WR_CTRL, data);
6039 
6040 		cik_update_rlc(rdev, tmp);
6041 	}
6042 }
6043 
6044 static const u32 mc_cg_registers[] =
6045 {
6046 	MC_HUB_MISC_HUB_CG,
6047 	MC_HUB_MISC_SIP_CG,
6048 	MC_HUB_MISC_VM_CG,
6049 	MC_XPB_CLK_GAT,
6050 	ATC_MISC_CG,
6051 	MC_CITF_MISC_WR_CG,
6052 	MC_CITF_MISC_RD_CG,
6053 	MC_CITF_MISC_VM_CG,
6054 	VM_L2_CG,
6055 };
6056 
6057 static void cik_enable_mc_ls(struct radeon_device *rdev,
6058 			     bool enable)
6059 {
6060 	int i;
6061 	u32 orig, data;
6062 
6063 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6064 		orig = data = RREG32(mc_cg_registers[i]);
6065 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6066 			data |= MC_LS_ENABLE;
6067 		else
6068 			data &= ~MC_LS_ENABLE;
6069 		if (data != orig)
6070 			WREG32(mc_cg_registers[i], data);
6071 	}
6072 }
6073 
6074 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6075 			       bool enable)
6076 {
6077 	int i;
6078 	u32 orig, data;
6079 
6080 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6081 		orig = data = RREG32(mc_cg_registers[i]);
6082 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6083 			data |= MC_CG_ENABLE;
6084 		else
6085 			data &= ~MC_CG_ENABLE;
6086 		if (data != orig)
6087 			WREG32(mc_cg_registers[i], data);
6088 	}
6089 }
6090 
6091 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6092 				 bool enable)
6093 {
6094 	u32 orig, data;
6095 
6096 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6097 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6098 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6099 	} else {
6100 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6101 		data |= 0xff000000;
6102 		if (data != orig)
6103 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6104 
6105 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6106 		data |= 0xff000000;
6107 		if (data != orig)
6108 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6109 	}
6110 }
6111 
6112 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6113 				 bool enable)
6114 {
6115 	u32 orig, data;
6116 
6117 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6118 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6119 		data |= 0x100;
6120 		if (orig != data)
6121 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6122 
6123 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6124 		data |= 0x100;
6125 		if (orig != data)
6126 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6127 	} else {
6128 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6129 		data &= ~0x100;
6130 		if (orig != data)
6131 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6132 
6133 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6134 		data &= ~0x100;
6135 		if (orig != data)
6136 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6137 	}
6138 }
6139 
6140 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6141 				bool enable)
6142 {
6143 	u32 orig, data;
6144 
6145 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6146 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6147 		data = 0xfff;
6148 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6149 
6150 		orig = data = RREG32(UVD_CGC_CTRL);
6151 		data |= DCM;
6152 		if (orig != data)
6153 			WREG32(UVD_CGC_CTRL, data);
6154 	} else {
6155 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6156 		data &= ~0xfff;
6157 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6158 
6159 		orig = data = RREG32(UVD_CGC_CTRL);
6160 		data &= ~DCM;
6161 		if (orig != data)
6162 			WREG32(UVD_CGC_CTRL, data);
6163 	}
6164 }
6165 
6166 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6167 			       bool enable)
6168 {
6169 	u32 orig, data;
6170 
6171 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6172 
6173 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6174 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6175 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6176 	else
6177 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6178 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6179 
6180 	if (orig != data)
6181 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6182 }
6183 
6184 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6185 				bool enable)
6186 {
6187 	u32 orig, data;
6188 
6189 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6190 
6191 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6192 		data &= ~CLOCK_GATING_DIS;
6193 	else
6194 		data |= CLOCK_GATING_DIS;
6195 
6196 	if (orig != data)
6197 		WREG32(HDP_HOST_PATH_CNTL, data);
6198 }
6199 
6200 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6201 			      bool enable)
6202 {
6203 	u32 orig, data;
6204 
6205 	orig = data = RREG32(HDP_MEM_POWER_LS);
6206 
6207 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6208 		data |= HDP_LS_ENABLE;
6209 	else
6210 		data &= ~HDP_LS_ENABLE;
6211 
6212 	if (orig != data)
6213 		WREG32(HDP_MEM_POWER_LS, data);
6214 }
6215 
6216 void cik_update_cg(struct radeon_device *rdev,
6217 		   u32 block, bool enable)
6218 {
6219 
6220 	if (block & RADEON_CG_BLOCK_GFX) {
6221 		cik_enable_gui_idle_interrupt(rdev, false);
6222 		/* order matters! */
6223 		if (enable) {
6224 			cik_enable_mgcg(rdev, true);
6225 			cik_enable_cgcg(rdev, true);
6226 		} else {
6227 			cik_enable_cgcg(rdev, false);
6228 			cik_enable_mgcg(rdev, false);
6229 		}
6230 		cik_enable_gui_idle_interrupt(rdev, true);
6231 	}
6232 
6233 	if (block & RADEON_CG_BLOCK_MC) {
6234 		if (!(rdev->flags & RADEON_IS_IGP)) {
6235 			cik_enable_mc_mgcg(rdev, enable);
6236 			cik_enable_mc_ls(rdev, enable);
6237 		}
6238 	}
6239 
6240 	if (block & RADEON_CG_BLOCK_SDMA) {
6241 		cik_enable_sdma_mgcg(rdev, enable);
6242 		cik_enable_sdma_mgls(rdev, enable);
6243 	}
6244 
6245 	if (block & RADEON_CG_BLOCK_BIF) {
6246 		cik_enable_bif_mgls(rdev, enable);
6247 	}
6248 
6249 	if (block & RADEON_CG_BLOCK_UVD) {
6250 		if (rdev->has_uvd)
6251 			cik_enable_uvd_mgcg(rdev, enable);
6252 	}
6253 
6254 	if (block & RADEON_CG_BLOCK_HDP) {
6255 		cik_enable_hdp_mgcg(rdev, enable);
6256 		cik_enable_hdp_ls(rdev, enable);
6257 	}
6258 
6259 	if (block & RADEON_CG_BLOCK_VCE) {
6260 		vce_v2_0_enable_mgcg(rdev, enable);
6261 	}
6262 }
6263 
6264 static void cik_init_cg(struct radeon_device *rdev)
6265 {
6266 
6267 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6268 
6269 	if (rdev->has_uvd)
6270 		si_init_uvd_internal_cg(rdev);
6271 
6272 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6273 			     RADEON_CG_BLOCK_SDMA |
6274 			     RADEON_CG_BLOCK_BIF |
6275 			     RADEON_CG_BLOCK_UVD |
6276 			     RADEON_CG_BLOCK_HDP), true);
6277 }
6278 
6279 static void cik_fini_cg(struct radeon_device *rdev)
6280 {
6281 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6282 			     RADEON_CG_BLOCK_SDMA |
6283 			     RADEON_CG_BLOCK_BIF |
6284 			     RADEON_CG_BLOCK_UVD |
6285 			     RADEON_CG_BLOCK_HDP), false);
6286 
6287 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6288 }
6289 
6290 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6291 					  bool enable)
6292 {
6293 	u32 data, orig;
6294 
6295 	orig = data = RREG32(RLC_PG_CNTL);
6296 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6297 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6298 	else
6299 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6300 	if (orig != data)
6301 		WREG32(RLC_PG_CNTL, data);
6302 }
6303 
6304 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6305 					  bool enable)
6306 {
6307 	u32 data, orig;
6308 
6309 	orig = data = RREG32(RLC_PG_CNTL);
6310 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6311 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6312 	else
6313 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6314 	if (orig != data)
6315 		WREG32(RLC_PG_CNTL, data);
6316 }
6317 
6318 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6319 {
6320 	u32 data, orig;
6321 
6322 	orig = data = RREG32(RLC_PG_CNTL);
6323 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6324 		data &= ~DISABLE_CP_PG;
6325 	else
6326 		data |= DISABLE_CP_PG;
6327 	if (orig != data)
6328 		WREG32(RLC_PG_CNTL, data);
6329 }
6330 
6331 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6332 {
6333 	u32 data, orig;
6334 
6335 	orig = data = RREG32(RLC_PG_CNTL);
6336 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6337 		data &= ~DISABLE_GDS_PG;
6338 	else
6339 		data |= DISABLE_GDS_PG;
6340 	if (orig != data)
6341 		WREG32(RLC_PG_CNTL, data);
6342 }
6343 
6344 #define CP_ME_TABLE_SIZE    96
6345 #define CP_ME_TABLE_OFFSET  2048
6346 #define CP_MEC_TABLE_OFFSET 4096
6347 
6348 void cik_init_cp_pg_table(struct radeon_device *rdev)
6349 {
6350 	const __be32 *fw_data;
6351 	volatile u32 *dst_ptr;
6352 	int me, i, max_me = 4;
6353 	u32 bo_offset = 0;
6354 	u32 table_offset;
6355 
6356 	if (rdev->family == CHIP_KAVERI)
6357 		max_me = 5;
6358 
6359 	if (rdev->rlc.cp_table_ptr == NULL)
6360 		return;
6361 
6362 	/* write the cp table buffer */
6363 	dst_ptr = rdev->rlc.cp_table_ptr;
6364 	for (me = 0; me < max_me; me++) {
6365 		if (me == 0) {
6366 			fw_data = (const __be32 *)rdev->ce_fw->data;
6367 			table_offset = CP_ME_TABLE_OFFSET;
6368 		} else if (me == 1) {
6369 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6370 			table_offset = CP_ME_TABLE_OFFSET;
6371 		} else if (me == 2) {
6372 			fw_data = (const __be32 *)rdev->me_fw->data;
6373 			table_offset = CP_ME_TABLE_OFFSET;
6374 		} else {
6375 			fw_data = (const __be32 *)rdev->mec_fw->data;
6376 			table_offset = CP_MEC_TABLE_OFFSET;
6377 		}
6378 
6379 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6380 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6381 		}
6382 		bo_offset += CP_ME_TABLE_SIZE;
6383 	}
6384 }
6385 
6386 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6387 				bool enable)
6388 {
6389 	u32 data, orig;
6390 
6391 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6392 		orig = data = RREG32(RLC_PG_CNTL);
6393 		data |= GFX_PG_ENABLE;
6394 		if (orig != data)
6395 			WREG32(RLC_PG_CNTL, data);
6396 
6397 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6398 		data |= AUTO_PG_EN;
6399 		if (orig != data)
6400 			WREG32(RLC_AUTO_PG_CTRL, data);
6401 	} else {
6402 		orig = data = RREG32(RLC_PG_CNTL);
6403 		data &= ~GFX_PG_ENABLE;
6404 		if (orig != data)
6405 			WREG32(RLC_PG_CNTL, data);
6406 
6407 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6408 		data &= ~AUTO_PG_EN;
6409 		if (orig != data)
6410 			WREG32(RLC_AUTO_PG_CTRL, data);
6411 
6412 		data = RREG32(DB_RENDER_CONTROL);
6413 	}
6414 }
6415 
6416 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6417 {
6418 	u32 mask = 0, tmp, tmp1;
6419 	int i;
6420 
6421 	cik_select_se_sh(rdev, se, sh);
6422 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6423 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6424 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6425 
6426 	tmp &= 0xffff0000;
6427 
6428 	tmp |= tmp1;
6429 	tmp >>= 16;
6430 
6431 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6432 		mask <<= 1;
6433 		mask |= 1;
6434 	}
6435 
6436 	return (~tmp) & mask;
6437 }
6438 
6439 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6440 {
6441 	u32 i, j, k, active_cu_number = 0;
6442 	u32 mask, counter, cu_bitmap;
6443 	u32 tmp = 0;
6444 
6445 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6446 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6447 			mask = 1;
6448 			cu_bitmap = 0;
6449 			counter = 0;
6450 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6451 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6452 					if (counter < 2)
6453 						cu_bitmap |= mask;
6454 					counter ++;
6455 				}
6456 				mask <<= 1;
6457 			}
6458 
6459 			active_cu_number += counter;
6460 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6461 		}
6462 	}
6463 
6464 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6465 
6466 	tmp = RREG32(RLC_MAX_PG_CU);
6467 	tmp &= ~MAX_PU_CU_MASK;
6468 	tmp |= MAX_PU_CU(active_cu_number);
6469 	WREG32(RLC_MAX_PG_CU, tmp);
6470 }
6471 
6472 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6473 				       bool enable)
6474 {
6475 	u32 data, orig;
6476 
6477 	orig = data = RREG32(RLC_PG_CNTL);
6478 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6479 		data |= STATIC_PER_CU_PG_ENABLE;
6480 	else
6481 		data &= ~STATIC_PER_CU_PG_ENABLE;
6482 	if (orig != data)
6483 		WREG32(RLC_PG_CNTL, data);
6484 }
6485 
6486 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6487 					bool enable)
6488 {
6489 	u32 data, orig;
6490 
6491 	orig = data = RREG32(RLC_PG_CNTL);
6492 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6493 		data |= DYN_PER_CU_PG_ENABLE;
6494 	else
6495 		data &= ~DYN_PER_CU_PG_ENABLE;
6496 	if (orig != data)
6497 		WREG32(RLC_PG_CNTL, data);
6498 }
6499 
6500 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6501 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6502 
6503 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6504 {
6505 	u32 data, orig;
6506 	u32 i;
6507 
6508 	if (rdev->rlc.cs_data) {
6509 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6510 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6511 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6512 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6513 	} else {
6514 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6515 		for (i = 0; i < 3; i++)
6516 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6517 	}
6518 	if (rdev->rlc.reg_list) {
6519 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6520 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6521 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6522 	}
6523 
6524 	orig = data = RREG32(RLC_PG_CNTL);
6525 	data |= GFX_PG_SRC;
6526 	if (orig != data)
6527 		WREG32(RLC_PG_CNTL, data);
6528 
6529 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6530 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6531 
6532 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6533 	data &= ~IDLE_POLL_COUNT_MASK;
6534 	data |= IDLE_POLL_COUNT(0x60);
6535 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6536 
6537 	data = 0x10101010;
6538 	WREG32(RLC_PG_DELAY, data);
6539 
6540 	data = RREG32(RLC_PG_DELAY_2);
6541 	data &= ~0xff;
6542 	data |= 0x3;
6543 	WREG32(RLC_PG_DELAY_2, data);
6544 
6545 	data = RREG32(RLC_AUTO_PG_CTRL);
6546 	data &= ~GRBM_REG_SGIT_MASK;
6547 	data |= GRBM_REG_SGIT(0x700);
6548 	WREG32(RLC_AUTO_PG_CTRL, data);
6549 
6550 }
6551 
6552 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6553 {
6554 	cik_enable_gfx_cgpg(rdev, enable);
6555 	cik_enable_gfx_static_mgpg(rdev, enable);
6556 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6557 }
6558 
6559 u32 cik_get_csb_size(struct radeon_device *rdev)
6560 {
6561 	u32 count = 0;
6562 	const struct cs_section_def *sect = NULL;
6563 	const struct cs_extent_def *ext = NULL;
6564 
6565 	if (rdev->rlc.cs_data == NULL)
6566 		return 0;
6567 
6568 	/* begin clear state */
6569 	count += 2;
6570 	/* context control state */
6571 	count += 3;
6572 
6573 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6574 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6575 			if (sect->id == SECT_CONTEXT)
6576 				count += 2 + ext->reg_count;
6577 			else
6578 				return 0;
6579 		}
6580 	}
6581 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6582 	count += 4;
6583 	/* end clear state */
6584 	count += 2;
6585 	/* clear state */
6586 	count += 2;
6587 
6588 	return count;
6589 }
6590 
6591 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6592 {
6593 	u32 count = 0, i;
6594 	const struct cs_section_def *sect = NULL;
6595 	const struct cs_extent_def *ext = NULL;
6596 
6597 	if (rdev->rlc.cs_data == NULL)
6598 		return;
6599 	if (buffer == NULL)
6600 		return;
6601 
6602 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6603 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6604 
6605 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6606 	buffer[count++] = cpu_to_le32(0x80000000);
6607 	buffer[count++] = cpu_to_le32(0x80000000);
6608 
6609 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6610 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6611 			if (sect->id == SECT_CONTEXT) {
6612 				buffer[count++] =
6613 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6614 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6615 				for (i = 0; i < ext->reg_count; i++)
6616 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6617 			} else {
6618 				return;
6619 			}
6620 		}
6621 	}
6622 
6623 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6624 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6625 	switch (rdev->family) {
6626 	case CHIP_BONAIRE:
6627 		buffer[count++] = cpu_to_le32(0x16000012);
6628 		buffer[count++] = cpu_to_le32(0x00000000);
6629 		break;
6630 	case CHIP_KAVERI:
6631 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6632 		buffer[count++] = cpu_to_le32(0x00000000);
6633 		break;
6634 	case CHIP_KABINI:
6635 	case CHIP_MULLINS:
6636 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6637 		buffer[count++] = cpu_to_le32(0x00000000);
6638 		break;
6639 	case CHIP_HAWAII:
6640 		buffer[count++] = cpu_to_le32(0x3a00161a);
6641 		buffer[count++] = cpu_to_le32(0x0000002e);
6642 		break;
6643 	default:
6644 		buffer[count++] = cpu_to_le32(0x00000000);
6645 		buffer[count++] = cpu_to_le32(0x00000000);
6646 		break;
6647 	}
6648 
6649 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6650 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6651 
6652 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6653 	buffer[count++] = cpu_to_le32(0);
6654 }
6655 
6656 static void cik_init_pg(struct radeon_device *rdev)
6657 {
6658 	if (rdev->pg_flags) {
6659 		cik_enable_sck_slowdown_on_pu(rdev, true);
6660 		cik_enable_sck_slowdown_on_pd(rdev, true);
6661 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6662 			cik_init_gfx_cgpg(rdev);
6663 			cik_enable_cp_pg(rdev, true);
6664 			cik_enable_gds_pg(rdev, true);
6665 		}
6666 		cik_init_ao_cu_mask(rdev);
6667 		cik_update_gfx_pg(rdev, true);
6668 	}
6669 }
6670 
6671 static void cik_fini_pg(struct radeon_device *rdev)
6672 {
6673 	if (rdev->pg_flags) {
6674 		cik_update_gfx_pg(rdev, false);
6675 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6676 			cik_enable_cp_pg(rdev, false);
6677 			cik_enable_gds_pg(rdev, false);
6678 		}
6679 	}
6680 }
6681 
6682 /*
6683  * Interrupts
6684  * Starting with r6xx, interrupts are handled via a ring buffer.
6685  * Ring buffers are areas of GPU accessible memory that the GPU
6686  * writes interrupt vectors into and the host reads vectors out of.
6687  * There is a rptr (read pointer) that determines where the
6688  * host is currently reading, and a wptr (write pointer)
6689  * which determines where the GPU has written.  When the
6690  * pointers are equal, the ring is idle.  When the GPU
6691  * writes vectors to the ring buffer, it increments the
6692  * wptr.  When there is an interrupt, the host then starts
6693  * fetching commands and processing them until the pointers are
6694  * equal again at which point it updates the rptr.
6695  */
6696 
6697 /**
6698  * cik_enable_interrupts - Enable the interrupt ring buffer
6699  *
6700  * @rdev: radeon_device pointer
6701  *
6702  * Enable the interrupt ring buffer (CIK).
6703  */
6704 static void cik_enable_interrupts(struct radeon_device *rdev)
6705 {
6706 	u32 ih_cntl = RREG32(IH_CNTL);
6707 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6708 
6709 	ih_cntl |= ENABLE_INTR;
6710 	ih_rb_cntl |= IH_RB_ENABLE;
6711 	WREG32(IH_CNTL, ih_cntl);
6712 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6713 	rdev->ih.enabled = true;
6714 }
6715 
6716 /**
6717  * cik_disable_interrupts - Disable the interrupt ring buffer
6718  *
6719  * @rdev: radeon_device pointer
6720  *
6721  * Disable the interrupt ring buffer (CIK).
6722  */
6723 static void cik_disable_interrupts(struct radeon_device *rdev)
6724 {
6725 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6726 	u32 ih_cntl = RREG32(IH_CNTL);
6727 
6728 	ih_rb_cntl &= ~IH_RB_ENABLE;
6729 	ih_cntl &= ~ENABLE_INTR;
6730 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6731 	WREG32(IH_CNTL, ih_cntl);
6732 	/* set rptr, wptr to 0 */
6733 	WREG32(IH_RB_RPTR, 0);
6734 	WREG32(IH_RB_WPTR, 0);
6735 	rdev->ih.enabled = false;
6736 	rdev->ih.rptr = 0;
6737 }
6738 
6739 /**
6740  * cik_disable_interrupt_state - Disable all interrupt sources
6741  *
6742  * @rdev: radeon_device pointer
6743  *
6744  * Clear all interrupt enable bits used by the driver (CIK).
6745  */
6746 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6747 {
6748 	u32 tmp;
6749 
6750 	/* gfx ring */
6751 	tmp = RREG32(CP_INT_CNTL_RING0) &
6752 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6753 	WREG32(CP_INT_CNTL_RING0, tmp);
6754 	/* sdma */
6755 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6756 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6757 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6758 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6759 	/* compute queues */
6760 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6761 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6762 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6763 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6764 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6765 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6766 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6767 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6768 	/* grbm */
6769 	WREG32(GRBM_INT_CNTL, 0);
6770 	/* vline/vblank, etc. */
6771 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6772 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6773 	if (rdev->num_crtc >= 4) {
6774 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6775 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6776 	}
6777 	if (rdev->num_crtc >= 6) {
6778 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6779 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6780 	}
6781 	/* pflip */
6782 	if (rdev->num_crtc >= 2) {
6783 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6784 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6785 	}
6786 	if (rdev->num_crtc >= 4) {
6787 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6788 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6789 	}
6790 	if (rdev->num_crtc >= 6) {
6791 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6792 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6793 	}
6794 
6795 	/* dac hotplug */
6796 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6797 
6798 	/* digital hotplug */
6799 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6800 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6801 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6802 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6803 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6804 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6805 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6806 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6807 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6808 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6809 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6810 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6811 
6812 }
6813 
6814 /**
6815  * cik_irq_init - init and enable the interrupt ring
6816  *
6817  * @rdev: radeon_device pointer
6818  *
6819  * Allocate a ring buffer for the interrupt controller,
6820  * enable the RLC, disable interrupts, enable the IH
6821  * ring buffer and enable it (CIK).
6822  * Called at device load and reume.
6823  * Returns 0 for success, errors for failure.
6824  */
6825 static int cik_irq_init(struct radeon_device *rdev)
6826 {
6827 	int ret = 0;
6828 	int rb_bufsz;
6829 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6830 
6831 	/* allocate ring */
6832 	ret = r600_ih_ring_alloc(rdev);
6833 	if (ret)
6834 		return ret;
6835 
6836 	/* disable irqs */
6837 	cik_disable_interrupts(rdev);
6838 
6839 	/* init rlc */
6840 	ret = cik_rlc_resume(rdev);
6841 	if (ret) {
6842 		r600_ih_ring_fini(rdev);
6843 		return ret;
6844 	}
6845 
6846 	/* setup interrupt control */
6847 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6848 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6849 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6850 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6851 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6852 	 */
6853 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6854 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6855 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6856 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6857 
6858 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6859 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6860 
6861 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6862 		      IH_WPTR_OVERFLOW_CLEAR |
6863 		      (rb_bufsz << 1));
6864 
6865 	if (rdev->wb.enabled)
6866 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6867 
6868 	/* set the writeback address whether it's enabled or not */
6869 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6870 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6871 
6872 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6873 
6874 	/* set rptr, wptr to 0 */
6875 	WREG32(IH_RB_RPTR, 0);
6876 	WREG32(IH_RB_WPTR, 0);
6877 
6878 	/* Default settings for IH_CNTL (disabled at first) */
6879 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6880 	/* RPTR_REARM only works if msi's are enabled */
6881 	if (rdev->msi_enabled)
6882 		ih_cntl |= RPTR_REARM;
6883 	WREG32(IH_CNTL, ih_cntl);
6884 
6885 	/* force the active interrupt state to all disabled */
6886 	cik_disable_interrupt_state(rdev);
6887 
6888 	pci_set_master(rdev->pdev);
6889 
6890 	/* enable irqs */
6891 	cik_enable_interrupts(rdev);
6892 
6893 	return ret;
6894 }
6895 
6896 /**
6897  * cik_irq_set - enable/disable interrupt sources
6898  *
6899  * @rdev: radeon_device pointer
6900  *
6901  * Enable interrupt sources on the GPU (vblanks, hpd,
6902  * etc.) (CIK).
6903  * Returns 0 for success, errors for failure.
6904  */
6905 int cik_irq_set(struct radeon_device *rdev)
6906 {
6907 	u32 cp_int_cntl;
6908 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6909 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6910 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6911 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6912 	u32 grbm_int_cntl = 0;
6913 	u32 dma_cntl, dma_cntl1;
6914 	u32 thermal_int;
6915 
6916 	if (!rdev->irq.installed) {
6917 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6918 		return -EINVAL;
6919 	}
6920 	/* don't enable anything if the ih is disabled */
6921 	if (!rdev->ih.enabled) {
6922 		cik_disable_interrupts(rdev);
6923 		/* force the active interrupt state to all disabled */
6924 		cik_disable_interrupt_state(rdev);
6925 		return 0;
6926 	}
6927 
6928 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6929 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6930 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6931 
6932 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6933 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6934 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6935 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6936 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6937 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6938 
6939 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6940 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6941 
6942 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6943 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6944 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6945 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6946 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6947 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6948 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6949 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6950 
6951 	if (rdev->flags & RADEON_IS_IGP)
6952 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6953 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6954 	else
6955 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6956 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6957 
6958 	/* enable CP interrupts on all rings */
6959 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6960 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6961 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6962 	}
6963 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6964 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6965 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6966 		if (ring->me == 1) {
6967 			switch (ring->pipe) {
6968 			case 0:
6969 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6970 				break;
6971 			case 1:
6972 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6973 				break;
6974 			case 2:
6975 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6976 				break;
6977 			case 3:
6978 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6979 				break;
6980 			default:
6981 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6982 				break;
6983 			}
6984 		} else if (ring->me == 2) {
6985 			switch (ring->pipe) {
6986 			case 0:
6987 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6988 				break;
6989 			case 1:
6990 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6991 				break;
6992 			case 2:
6993 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6994 				break;
6995 			case 3:
6996 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6997 				break;
6998 			default:
6999 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7000 				break;
7001 			}
7002 		} else {
7003 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7004 		}
7005 	}
7006 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7007 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7008 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7009 		if (ring->me == 1) {
7010 			switch (ring->pipe) {
7011 			case 0:
7012 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7013 				break;
7014 			case 1:
7015 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7016 				break;
7017 			case 2:
7018 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7019 				break;
7020 			case 3:
7021 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7022 				break;
7023 			default:
7024 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7025 				break;
7026 			}
7027 		} else if (ring->me == 2) {
7028 			switch (ring->pipe) {
7029 			case 0:
7030 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7031 				break;
7032 			case 1:
7033 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7034 				break;
7035 			case 2:
7036 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7037 				break;
7038 			case 3:
7039 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7040 				break;
7041 			default:
7042 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7043 				break;
7044 			}
7045 		} else {
7046 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7047 		}
7048 	}
7049 
7050 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7051 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7052 		dma_cntl |= TRAP_ENABLE;
7053 	}
7054 
7055 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7056 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7057 		dma_cntl1 |= TRAP_ENABLE;
7058 	}
7059 
7060 	if (rdev->irq.crtc_vblank_int[0] ||
7061 	    atomic_read(&rdev->irq.pflip[0])) {
7062 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7063 		crtc1 |= VBLANK_INTERRUPT_MASK;
7064 	}
7065 	if (rdev->irq.crtc_vblank_int[1] ||
7066 	    atomic_read(&rdev->irq.pflip[1])) {
7067 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7068 		crtc2 |= VBLANK_INTERRUPT_MASK;
7069 	}
7070 	if (rdev->irq.crtc_vblank_int[2] ||
7071 	    atomic_read(&rdev->irq.pflip[2])) {
7072 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7073 		crtc3 |= VBLANK_INTERRUPT_MASK;
7074 	}
7075 	if (rdev->irq.crtc_vblank_int[3] ||
7076 	    atomic_read(&rdev->irq.pflip[3])) {
7077 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7078 		crtc4 |= VBLANK_INTERRUPT_MASK;
7079 	}
7080 	if (rdev->irq.crtc_vblank_int[4] ||
7081 	    atomic_read(&rdev->irq.pflip[4])) {
7082 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7083 		crtc5 |= VBLANK_INTERRUPT_MASK;
7084 	}
7085 	if (rdev->irq.crtc_vblank_int[5] ||
7086 	    atomic_read(&rdev->irq.pflip[5])) {
7087 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7088 		crtc6 |= VBLANK_INTERRUPT_MASK;
7089 	}
7090 	if (rdev->irq.hpd[0]) {
7091 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7092 		hpd1 |= DC_HPDx_INT_EN;
7093 	}
7094 	if (rdev->irq.hpd[1]) {
7095 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7096 		hpd2 |= DC_HPDx_INT_EN;
7097 	}
7098 	if (rdev->irq.hpd[2]) {
7099 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7100 		hpd3 |= DC_HPDx_INT_EN;
7101 	}
7102 	if (rdev->irq.hpd[3]) {
7103 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7104 		hpd4 |= DC_HPDx_INT_EN;
7105 	}
7106 	if (rdev->irq.hpd[4]) {
7107 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7108 		hpd5 |= DC_HPDx_INT_EN;
7109 	}
7110 	if (rdev->irq.hpd[5]) {
7111 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7112 		hpd6 |= DC_HPDx_INT_EN;
7113 	}
7114 
7115 	if (rdev->irq.dpm_thermal) {
7116 		DRM_DEBUG("dpm thermal\n");
7117 		if (rdev->flags & RADEON_IS_IGP)
7118 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7119 		else
7120 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7121 	}
7122 
7123 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7124 
7125 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7126 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7127 
7128 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7129 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7130 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7131 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7132 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7133 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7134 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7135 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7136 
7137 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7138 
7139 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7140 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7141 	if (rdev->num_crtc >= 4) {
7142 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7143 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7144 	}
7145 	if (rdev->num_crtc >= 6) {
7146 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7147 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7148 	}
7149 
7150 	if (rdev->num_crtc >= 2) {
7151 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7152 		       GRPH_PFLIP_INT_MASK);
7153 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7154 		       GRPH_PFLIP_INT_MASK);
7155 	}
7156 	if (rdev->num_crtc >= 4) {
7157 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7158 		       GRPH_PFLIP_INT_MASK);
7159 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7160 		       GRPH_PFLIP_INT_MASK);
7161 	}
7162 	if (rdev->num_crtc >= 6) {
7163 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7164 		       GRPH_PFLIP_INT_MASK);
7165 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7166 		       GRPH_PFLIP_INT_MASK);
7167 	}
7168 
7169 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7170 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7171 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7172 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7173 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7174 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7175 
7176 	if (rdev->flags & RADEON_IS_IGP)
7177 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7178 	else
7179 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7180 
7181 	return 0;
7182 }
7183 
7184 /**
7185  * cik_irq_ack - ack interrupt sources
7186  *
7187  * @rdev: radeon_device pointer
7188  *
7189  * Ack interrupt sources on the GPU (vblanks, hpd,
7190  * etc.) (CIK).  Certain interrupts sources are sw
7191  * generated and do not require an explicit ack.
7192  */
7193 static inline void cik_irq_ack(struct radeon_device *rdev)
7194 {
7195 	u32 tmp;
7196 
7197 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7198 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7199 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7200 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7201 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7202 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7203 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7204 
7205 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7206 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7207 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7208 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7209 	if (rdev->num_crtc >= 4) {
7210 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7211 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7212 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7213 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7214 	}
7215 	if (rdev->num_crtc >= 6) {
7216 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7217 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7218 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7219 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7220 	}
7221 
7222 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7223 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7224 		       GRPH_PFLIP_INT_CLEAR);
7225 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7226 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227 		       GRPH_PFLIP_INT_CLEAR);
7228 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7229 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7230 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7231 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7232 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7233 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7234 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7235 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7236 
7237 	if (rdev->num_crtc >= 4) {
7238 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7239 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7240 			       GRPH_PFLIP_INT_CLEAR);
7241 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7242 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7243 			       GRPH_PFLIP_INT_CLEAR);
7244 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7245 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7246 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7247 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7248 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7249 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7250 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7251 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7252 	}
7253 
7254 	if (rdev->num_crtc >= 6) {
7255 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7256 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7257 			       GRPH_PFLIP_INT_CLEAR);
7258 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7259 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260 			       GRPH_PFLIP_INT_CLEAR);
7261 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7262 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7263 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7264 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7265 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7266 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7267 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7268 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7269 	}
7270 
7271 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7272 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7273 		tmp |= DC_HPDx_INT_ACK;
7274 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7275 	}
7276 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7277 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7278 		tmp |= DC_HPDx_INT_ACK;
7279 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7280 	}
7281 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7282 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7283 		tmp |= DC_HPDx_INT_ACK;
7284 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7285 	}
7286 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7287 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7288 		tmp |= DC_HPDx_INT_ACK;
7289 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7290 	}
7291 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7292 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7293 		tmp |= DC_HPDx_INT_ACK;
7294 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7295 	}
7296 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7297 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7298 		tmp |= DC_HPDx_INT_ACK;
7299 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7300 	}
7301 }
7302 
7303 /**
7304  * cik_irq_disable - disable interrupts
7305  *
7306  * @rdev: radeon_device pointer
7307  *
7308  * Disable interrupts on the hw (CIK).
7309  */
7310 static void cik_irq_disable(struct radeon_device *rdev)
7311 {
7312 	cik_disable_interrupts(rdev);
7313 	/* Wait and acknowledge irq */
7314 	mdelay(1);
7315 	cik_irq_ack(rdev);
7316 	cik_disable_interrupt_state(rdev);
7317 }
7318 
7319 /**
7320  * cik_irq_disable - disable interrupts for suspend
7321  *
7322  * @rdev: radeon_device pointer
7323  *
7324  * Disable interrupts and stop the RLC (CIK).
7325  * Used for suspend.
7326  */
7327 static void cik_irq_suspend(struct radeon_device *rdev)
7328 {
7329 	cik_irq_disable(rdev);
7330 	cik_rlc_stop(rdev);
7331 }
7332 
7333 /**
7334  * cik_irq_fini - tear down interrupt support
7335  *
7336  * @rdev: radeon_device pointer
7337  *
7338  * Disable interrupts on the hw and free the IH ring
7339  * buffer (CIK).
7340  * Used for driver unload.
7341  */
7342 static void cik_irq_fini(struct radeon_device *rdev)
7343 {
7344 	cik_irq_suspend(rdev);
7345 	r600_ih_ring_fini(rdev);
7346 }
7347 
7348 /**
7349  * cik_get_ih_wptr - get the IH ring buffer wptr
7350  *
7351  * @rdev: radeon_device pointer
7352  *
7353  * Get the IH ring buffer wptr from either the register
7354  * or the writeback memory buffer (CIK).  Also check for
7355  * ring buffer overflow and deal with it.
7356  * Used by cik_irq_process().
7357  * Returns the value of the wptr.
7358  */
7359 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7360 {
7361 	u32 wptr, tmp;
7362 
7363 	if (rdev->wb.enabled)
7364 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7365 	else
7366 		wptr = RREG32(IH_RB_WPTR);
7367 
7368 	if (wptr & RB_OVERFLOW) {
7369 		/* When a ring buffer overflow happen start parsing interrupt
7370 		 * from the last not overwritten vector (wptr + 16). Hopefully
7371 		 * this should allow us to catchup.
7372 		 */
7373 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7374 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7375 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7376 		tmp = RREG32(IH_RB_CNTL);
7377 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7378 		WREG32(IH_RB_CNTL, tmp);
7379 	}
7380 	return (wptr & rdev->ih.ptr_mask);
7381 }
7382 
7383 /*        CIK IV Ring
7384  * Each IV ring entry is 128 bits:
7385  * [7:0]    - interrupt source id
7386  * [31:8]   - reserved
7387  * [59:32]  - interrupt source data
7388  * [63:60]  - reserved
7389  * [71:64]  - RINGID
7390  *            CP:
7391  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7392  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7393  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7394  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7395  *            PIPE_ID - ME0 0=3D
7396  *                    - ME1&2 compute dispatcher (4 pipes each)
7397  *            SDMA:
7398  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7399  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7400  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7401  * [79:72]  - VMID
7402  * [95:80]  - PASID
7403  * [127:96] - reserved
7404  */
7405 /**
7406  * cik_irq_process - interrupt handler
7407  *
7408  * @rdev: radeon_device pointer
7409  *
7410  * Interrupt hander (CIK).  Walk the IH ring,
7411  * ack interrupts and schedule work to handle
7412  * interrupt events.
7413  * Returns irq process return code.
7414  */
7415 int cik_irq_process(struct radeon_device *rdev)
7416 {
7417 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7418 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7419 	u32 wptr;
7420 	u32 rptr;
7421 	u32 src_id, src_data, ring_id;
7422 	u8 me_id, pipe_id, queue_id;
7423 	u32 ring_index;
7424 	bool queue_hotplug = false;
7425 	bool queue_reset = false;
7426 	u32 addr, status, mc_client;
7427 	bool queue_thermal = false;
7428 
7429 	if (!rdev->ih.enabled || rdev->shutdown)
7430 		return IRQ_NONE;
7431 
7432 	wptr = cik_get_ih_wptr(rdev);
7433 
7434 restart_ih:
7435 	/* is somebody else already processing irqs? */
7436 	if (atomic_xchg(&rdev->ih.lock, 1))
7437 		return IRQ_NONE;
7438 
7439 	rptr = rdev->ih.rptr;
7440 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7441 
7442 	/* Order reading of wptr vs. reading of IH ring data */
7443 	rmb();
7444 
7445 	/* display interrupts */
7446 	cik_irq_ack(rdev);
7447 
7448 	while (rptr != wptr) {
7449 		/* wptr/rptr are in bytes! */
7450 		ring_index = rptr / 4;
7451 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7452 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7453 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7454 
7455 		switch (src_id) {
7456 		case 1: /* D1 vblank/vline */
7457 			switch (src_data) {
7458 			case 0: /* D1 vblank */
7459 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7460 					if (rdev->irq.crtc_vblank_int[0]) {
7461 						drm_handle_vblank(rdev->ddev, 0);
7462 						rdev->pm.vblank_sync = true;
7463 						wake_up(&rdev->irq.vblank_queue);
7464 					}
7465 					if (atomic_read(&rdev->irq.pflip[0]))
7466 						radeon_crtc_handle_vblank(rdev, 0);
7467 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7468 					DRM_DEBUG("IH: D1 vblank\n");
7469 				}
7470 				break;
7471 			case 1: /* D1 vline */
7472 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7473 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7474 					DRM_DEBUG("IH: D1 vline\n");
7475 				}
7476 				break;
7477 			default:
7478 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7479 				break;
7480 			}
7481 			break;
7482 		case 2: /* D2 vblank/vline */
7483 			switch (src_data) {
7484 			case 0: /* D2 vblank */
7485 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7486 					if (rdev->irq.crtc_vblank_int[1]) {
7487 						drm_handle_vblank(rdev->ddev, 1);
7488 						rdev->pm.vblank_sync = true;
7489 						wake_up(&rdev->irq.vblank_queue);
7490 					}
7491 					if (atomic_read(&rdev->irq.pflip[1]))
7492 						radeon_crtc_handle_vblank(rdev, 1);
7493 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7494 					DRM_DEBUG("IH: D2 vblank\n");
7495 				}
7496 				break;
7497 			case 1: /* D2 vline */
7498 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7499 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7500 					DRM_DEBUG("IH: D2 vline\n");
7501 				}
7502 				break;
7503 			default:
7504 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7505 				break;
7506 			}
7507 			break;
7508 		case 3: /* D3 vblank/vline */
7509 			switch (src_data) {
7510 			case 0: /* D3 vblank */
7511 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7512 					if (rdev->irq.crtc_vblank_int[2]) {
7513 						drm_handle_vblank(rdev->ddev, 2);
7514 						rdev->pm.vblank_sync = true;
7515 						wake_up(&rdev->irq.vblank_queue);
7516 					}
7517 					if (atomic_read(&rdev->irq.pflip[2]))
7518 						radeon_crtc_handle_vblank(rdev, 2);
7519 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7520 					DRM_DEBUG("IH: D3 vblank\n");
7521 				}
7522 				break;
7523 			case 1: /* D3 vline */
7524 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7525 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7526 					DRM_DEBUG("IH: D3 vline\n");
7527 				}
7528 				break;
7529 			default:
7530 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7531 				break;
7532 			}
7533 			break;
7534 		case 4: /* D4 vblank/vline */
7535 			switch (src_data) {
7536 			case 0: /* D4 vblank */
7537 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7538 					if (rdev->irq.crtc_vblank_int[3]) {
7539 						drm_handle_vblank(rdev->ddev, 3);
7540 						rdev->pm.vblank_sync = true;
7541 						wake_up(&rdev->irq.vblank_queue);
7542 					}
7543 					if (atomic_read(&rdev->irq.pflip[3]))
7544 						radeon_crtc_handle_vblank(rdev, 3);
7545 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7546 					DRM_DEBUG("IH: D4 vblank\n");
7547 				}
7548 				break;
7549 			case 1: /* D4 vline */
7550 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7551 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7552 					DRM_DEBUG("IH: D4 vline\n");
7553 				}
7554 				break;
7555 			default:
7556 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7557 				break;
7558 			}
7559 			break;
7560 		case 5: /* D5 vblank/vline */
7561 			switch (src_data) {
7562 			case 0: /* D5 vblank */
7563 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7564 					if (rdev->irq.crtc_vblank_int[4]) {
7565 						drm_handle_vblank(rdev->ddev, 4);
7566 						rdev->pm.vblank_sync = true;
7567 						wake_up(&rdev->irq.vblank_queue);
7568 					}
7569 					if (atomic_read(&rdev->irq.pflip[4]))
7570 						radeon_crtc_handle_vblank(rdev, 4);
7571 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7572 					DRM_DEBUG("IH: D5 vblank\n");
7573 				}
7574 				break;
7575 			case 1: /* D5 vline */
7576 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7577 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7578 					DRM_DEBUG("IH: D5 vline\n");
7579 				}
7580 				break;
7581 			default:
7582 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7583 				break;
7584 			}
7585 			break;
7586 		case 6: /* D6 vblank/vline */
7587 			switch (src_data) {
7588 			case 0: /* D6 vblank */
7589 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7590 					if (rdev->irq.crtc_vblank_int[5]) {
7591 						drm_handle_vblank(rdev->ddev, 5);
7592 						rdev->pm.vblank_sync = true;
7593 						wake_up(&rdev->irq.vblank_queue);
7594 					}
7595 					if (atomic_read(&rdev->irq.pflip[5]))
7596 						radeon_crtc_handle_vblank(rdev, 5);
7597 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7598 					DRM_DEBUG("IH: D6 vblank\n");
7599 				}
7600 				break;
7601 			case 1: /* D6 vline */
7602 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7603 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7604 					DRM_DEBUG("IH: D6 vline\n");
7605 				}
7606 				break;
7607 			default:
7608 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7609 				break;
7610 			}
7611 			break;
7612 		case 8: /* D1 page flip */
7613 		case 10: /* D2 page flip */
7614 		case 12: /* D3 page flip */
7615 		case 14: /* D4 page flip */
7616 		case 16: /* D5 page flip */
7617 		case 18: /* D6 page flip */
7618 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7619 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7620 			break;
7621 		case 42: /* HPD hotplug */
7622 			switch (src_data) {
7623 			case 0:
7624 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7625 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7626 					queue_hotplug = true;
7627 					DRM_DEBUG("IH: HPD1\n");
7628 				}
7629 				break;
7630 			case 1:
7631 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7632 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7633 					queue_hotplug = true;
7634 					DRM_DEBUG("IH: HPD2\n");
7635 				}
7636 				break;
7637 			case 2:
7638 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7639 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7640 					queue_hotplug = true;
7641 					DRM_DEBUG("IH: HPD3\n");
7642 				}
7643 				break;
7644 			case 3:
7645 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7646 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7647 					queue_hotplug = true;
7648 					DRM_DEBUG("IH: HPD4\n");
7649 				}
7650 				break;
7651 			case 4:
7652 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7653 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7654 					queue_hotplug = true;
7655 					DRM_DEBUG("IH: HPD5\n");
7656 				}
7657 				break;
7658 			case 5:
7659 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7660 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7661 					queue_hotplug = true;
7662 					DRM_DEBUG("IH: HPD6\n");
7663 				}
7664 				break;
7665 			default:
7666 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7667 				break;
7668 			}
7669 			break;
7670 		case 124: /* UVD */
7671 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7672 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7673 			break;
7674 		case 146:
7675 		case 147:
7676 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7677 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7678 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7679 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7680 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7681 				addr);
7682 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7683 				status);
7684 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7685 			/* reset addr and status */
7686 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7687 			break;
7688 		case 167: /* VCE */
7689 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7690 			switch (src_data) {
7691 			case 0:
7692 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7693 				break;
7694 			case 1:
7695 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7696 				break;
7697 			default:
7698 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7699 				break;
7700 			}
7701 			break;
7702 		case 176: /* GFX RB CP_INT */
7703 		case 177: /* GFX IB CP_INT */
7704 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7705 			break;
7706 		case 181: /* CP EOP event */
7707 			DRM_DEBUG("IH: CP EOP\n");
7708 			/* XXX check the bitfield order! */
7709 			me_id = (ring_id & 0x60) >> 5;
7710 			pipe_id = (ring_id & 0x18) >> 3;
7711 			queue_id = (ring_id & 0x7) >> 0;
7712 			switch (me_id) {
7713 			case 0:
7714 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7715 				break;
7716 			case 1:
7717 			case 2:
7718 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7719 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7720 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7721 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7722 				break;
7723 			}
7724 			break;
7725 		case 184: /* CP Privileged reg access */
7726 			DRM_ERROR("Illegal register access in command stream\n");
7727 			/* XXX check the bitfield order! */
7728 			me_id = (ring_id & 0x60) >> 5;
7729 			pipe_id = (ring_id & 0x18) >> 3;
7730 			queue_id = (ring_id & 0x7) >> 0;
7731 			switch (me_id) {
7732 			case 0:
7733 				/* This results in a full GPU reset, but all we need to do is soft
7734 				 * reset the CP for gfx
7735 				 */
7736 				queue_reset = true;
7737 				break;
7738 			case 1:
7739 				/* XXX compute */
7740 				queue_reset = true;
7741 				break;
7742 			case 2:
7743 				/* XXX compute */
7744 				queue_reset = true;
7745 				break;
7746 			}
7747 			break;
7748 		case 185: /* CP Privileged inst */
7749 			DRM_ERROR("Illegal instruction in command stream\n");
7750 			/* XXX check the bitfield order! */
7751 			me_id = (ring_id & 0x60) >> 5;
7752 			pipe_id = (ring_id & 0x18) >> 3;
7753 			queue_id = (ring_id & 0x7) >> 0;
7754 			switch (me_id) {
7755 			case 0:
7756 				/* This results in a full GPU reset, but all we need to do is soft
7757 				 * reset the CP for gfx
7758 				 */
7759 				queue_reset = true;
7760 				break;
7761 			case 1:
7762 				/* XXX compute */
7763 				queue_reset = true;
7764 				break;
7765 			case 2:
7766 				/* XXX compute */
7767 				queue_reset = true;
7768 				break;
7769 			}
7770 			break;
7771 		case 224: /* SDMA trap event */
7772 			/* XXX check the bitfield order! */
7773 			me_id = (ring_id & 0x3) >> 0;
7774 			queue_id = (ring_id & 0xc) >> 2;
7775 			DRM_DEBUG("IH: SDMA trap\n");
7776 			switch (me_id) {
7777 			case 0:
7778 				switch (queue_id) {
7779 				case 0:
7780 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7781 					break;
7782 				case 1:
7783 					/* XXX compute */
7784 					break;
7785 				case 2:
7786 					/* XXX compute */
7787 					break;
7788 				}
7789 				break;
7790 			case 1:
7791 				switch (queue_id) {
7792 				case 0:
7793 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7794 					break;
7795 				case 1:
7796 					/* XXX compute */
7797 					break;
7798 				case 2:
7799 					/* XXX compute */
7800 					break;
7801 				}
7802 				break;
7803 			}
7804 			break;
7805 		case 230: /* thermal low to high */
7806 			DRM_DEBUG("IH: thermal low to high\n");
7807 			rdev->pm.dpm.thermal.high_to_low = false;
7808 			queue_thermal = true;
7809 			break;
7810 		case 231: /* thermal high to low */
7811 			DRM_DEBUG("IH: thermal high to low\n");
7812 			rdev->pm.dpm.thermal.high_to_low = true;
7813 			queue_thermal = true;
7814 			break;
7815 		case 233: /* GUI IDLE */
7816 			DRM_DEBUG("IH: GUI idle\n");
7817 			break;
7818 		case 241: /* SDMA Privileged inst */
7819 		case 247: /* SDMA Privileged inst */
7820 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7821 			/* XXX check the bitfield order! */
7822 			me_id = (ring_id & 0x3) >> 0;
7823 			queue_id = (ring_id & 0xc) >> 2;
7824 			switch (me_id) {
7825 			case 0:
7826 				switch (queue_id) {
7827 				case 0:
7828 					queue_reset = true;
7829 					break;
7830 				case 1:
7831 					/* XXX compute */
7832 					queue_reset = true;
7833 					break;
7834 				case 2:
7835 					/* XXX compute */
7836 					queue_reset = true;
7837 					break;
7838 				}
7839 				break;
7840 			case 1:
7841 				switch (queue_id) {
7842 				case 0:
7843 					queue_reset = true;
7844 					break;
7845 				case 1:
7846 					/* XXX compute */
7847 					queue_reset = true;
7848 					break;
7849 				case 2:
7850 					/* XXX compute */
7851 					queue_reset = true;
7852 					break;
7853 				}
7854 				break;
7855 			}
7856 			break;
7857 		default:
7858 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7859 			break;
7860 		}
7861 
7862 		/* wptr/rptr are in bytes! */
7863 		rptr += 16;
7864 		rptr &= rdev->ih.ptr_mask;
7865 	}
7866 	if (queue_hotplug)
7867 		schedule_work(&rdev->hotplug_work);
7868 	if (queue_reset)
7869 		schedule_work(&rdev->reset_work);
7870 	if (queue_thermal)
7871 		schedule_work(&rdev->pm.dpm.thermal.work);
7872 	rdev->ih.rptr = rptr;
7873 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7874 	atomic_set(&rdev->ih.lock, 0);
7875 
7876 	/* make sure wptr hasn't changed while processing */
7877 	wptr = cik_get_ih_wptr(rdev);
7878 	if (wptr != rptr)
7879 		goto restart_ih;
7880 
7881 	return IRQ_HANDLED;
7882 }
7883 
7884 /*
7885  * startup/shutdown callbacks
7886  */
7887 /**
7888  * cik_startup - program the asic to a functional state
7889  *
7890  * @rdev: radeon_device pointer
7891  *
7892  * Programs the asic to a functional state (CIK).
7893  * Called by cik_init() and cik_resume().
7894  * Returns 0 for success, error for failure.
7895  */
7896 static int cik_startup(struct radeon_device *rdev)
7897 {
7898 	struct radeon_ring *ring;
7899 	int r;
7900 
7901 	/* enable pcie gen2/3 link */
7902 	cik_pcie_gen3_enable(rdev);
7903 	/* enable aspm */
7904 	cik_program_aspm(rdev);
7905 
7906 	/* scratch needs to be initialized before MC */
7907 	r = r600_vram_scratch_init(rdev);
7908 	if (r)
7909 		return r;
7910 
7911 	cik_mc_program(rdev);
7912 
7913 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7914 		r = ci_mc_load_microcode(rdev);
7915 		if (r) {
7916 			DRM_ERROR("Failed to load MC firmware!\n");
7917 			return r;
7918 		}
7919 	}
7920 
7921 	r = cik_pcie_gart_enable(rdev);
7922 	if (r)
7923 		return r;
7924 	cik_gpu_init(rdev);
7925 
7926 	/* allocate rlc buffers */
7927 	if (rdev->flags & RADEON_IS_IGP) {
7928 		if (rdev->family == CHIP_KAVERI) {
7929 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7930 			rdev->rlc.reg_list_size =
7931 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7932 		} else {
7933 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7934 			rdev->rlc.reg_list_size =
7935 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7936 		}
7937 	}
7938 	rdev->rlc.cs_data = ci_cs_data;
7939 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7940 	r = sumo_rlc_init(rdev);
7941 	if (r) {
7942 		DRM_ERROR("Failed to init rlc BOs!\n");
7943 		return r;
7944 	}
7945 
7946 	/* allocate wb buffer */
7947 	r = radeon_wb_init(rdev);
7948 	if (r)
7949 		return r;
7950 
7951 	/* allocate mec buffers */
7952 	r = cik_mec_init(rdev);
7953 	if (r) {
7954 		DRM_ERROR("Failed to init MEC BOs!\n");
7955 		return r;
7956 	}
7957 
7958 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7959 	if (r) {
7960 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7961 		return r;
7962 	}
7963 
7964 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7965 	if (r) {
7966 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7967 		return r;
7968 	}
7969 
7970 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7971 	if (r) {
7972 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7973 		return r;
7974 	}
7975 
7976 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7977 	if (r) {
7978 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7979 		return r;
7980 	}
7981 
7982 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7983 	if (r) {
7984 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7985 		return r;
7986 	}
7987 
7988 	r = radeon_uvd_resume(rdev);
7989 	if (!r) {
7990 		r = uvd_v4_2_resume(rdev);
7991 		if (!r) {
7992 			r = radeon_fence_driver_start_ring(rdev,
7993 							   R600_RING_TYPE_UVD_INDEX);
7994 			if (r)
7995 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7996 		}
7997 	}
7998 	if (r)
7999 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8000 
8001 	r = radeon_vce_resume(rdev);
8002 	if (!r) {
8003 		r = vce_v2_0_resume(rdev);
8004 		if (!r)
8005 			r = radeon_fence_driver_start_ring(rdev,
8006 							   TN_RING_TYPE_VCE1_INDEX);
8007 		if (!r)
8008 			r = radeon_fence_driver_start_ring(rdev,
8009 							   TN_RING_TYPE_VCE2_INDEX);
8010 	}
8011 	if (r) {
8012 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8013 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8014 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8015 	}
8016 
8017 	/* Enable IRQ */
8018 	if (!rdev->irq.installed) {
8019 		r = radeon_irq_kms_init(rdev);
8020 		if (r)
8021 			return r;
8022 	}
8023 
8024 	r = cik_irq_init(rdev);
8025 	if (r) {
8026 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8027 		radeon_irq_kms_fini(rdev);
8028 		return r;
8029 	}
8030 	cik_irq_set(rdev);
8031 
8032 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8033 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8034 			     PACKET3(PACKET3_NOP, 0x3FFF));
8035 	if (r)
8036 		return r;
8037 
8038 	/* set up the compute queues */
8039 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8040 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8041 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8042 			     PACKET3(PACKET3_NOP, 0x3FFF));
8043 	if (r)
8044 		return r;
8045 	ring->me = 1; /* first MEC */
8046 	ring->pipe = 0; /* first pipe */
8047 	ring->queue = 0; /* first queue */
8048 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8049 
8050 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8051 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8052 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8053 			     PACKET3(PACKET3_NOP, 0x3FFF));
8054 	if (r)
8055 		return r;
8056 	/* dGPU only have 1 MEC */
8057 	ring->me = 1; /* first MEC */
8058 	ring->pipe = 0; /* first pipe */
8059 	ring->queue = 1; /* second queue */
8060 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8061 
8062 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8063 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8064 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8065 	if (r)
8066 		return r;
8067 
8068 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8069 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8070 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8071 	if (r)
8072 		return r;
8073 
8074 	r = cik_cp_resume(rdev);
8075 	if (r)
8076 		return r;
8077 
8078 	r = cik_sdma_resume(rdev);
8079 	if (r)
8080 		return r;
8081 
8082 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8083 	if (ring->ring_size) {
8084 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8085 				     RADEON_CP_PACKET2);
8086 		if (!r)
8087 			r = uvd_v1_0_init(rdev);
8088 		if (r)
8089 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8090 	}
8091 
8092 	r = -ENOENT;
8093 
8094 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8095 	if (ring->ring_size)
8096 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8097 				     VCE_CMD_NO_OP);
8098 
8099 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8100 	if (ring->ring_size)
8101 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8102 				     VCE_CMD_NO_OP);
8103 
8104 	if (!r)
8105 		r = vce_v1_0_init(rdev);
8106 	else if (r != -ENOENT)
8107 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8108 
8109 	r = radeon_ib_pool_init(rdev);
8110 	if (r) {
8111 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8112 		return r;
8113 	}
8114 
8115 	r = radeon_vm_manager_init(rdev);
8116 	if (r) {
8117 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8118 		return r;
8119 	}
8120 
8121 	r = dce6_audio_init(rdev);
8122 	if (r)
8123 		return r;
8124 
8125 	return 0;
8126 }
8127 
8128 /**
8129  * cik_resume - resume the asic to a functional state
8130  *
8131  * @rdev: radeon_device pointer
8132  *
8133  * Programs the asic to a functional state (CIK).
8134  * Called at resume.
8135  * Returns 0 for success, error for failure.
8136  */
8137 int cik_resume(struct radeon_device *rdev)
8138 {
8139 	int r;
8140 
8141 	/* post card */
8142 	atom_asic_init(rdev->mode_info.atom_context);
8143 
8144 	/* init golden registers */
8145 	cik_init_golden_registers(rdev);
8146 
8147 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8148 		radeon_pm_resume(rdev);
8149 
8150 	rdev->accel_working = true;
8151 	r = cik_startup(rdev);
8152 	if (r) {
8153 		DRM_ERROR("cik startup failed on resume\n");
8154 		rdev->accel_working = false;
8155 		return r;
8156 	}
8157 
8158 	return r;
8159 
8160 }
8161 
8162 /**
8163  * cik_suspend - suspend the asic
8164  *
8165  * @rdev: radeon_device pointer
8166  *
8167  * Bring the chip into a state suitable for suspend (CIK).
8168  * Called at suspend.
8169  * Returns 0 for success.
8170  */
8171 int cik_suspend(struct radeon_device *rdev)
8172 {
8173 	radeon_pm_suspend(rdev);
8174 	dce6_audio_fini(rdev);
8175 	radeon_vm_manager_fini(rdev);
8176 	cik_cp_enable(rdev, false);
8177 	cik_sdma_enable(rdev, false);
8178 	uvd_v1_0_fini(rdev);
8179 	radeon_uvd_suspend(rdev);
8180 	radeon_vce_suspend(rdev);
8181 	cik_fini_pg(rdev);
8182 	cik_fini_cg(rdev);
8183 	cik_irq_suspend(rdev);
8184 	radeon_wb_disable(rdev);
8185 	cik_pcie_gart_disable(rdev);
8186 	return 0;
8187 }
8188 
8189 /* Plan is to move initialization in that function and use
8190  * helper function so that radeon_device_init pretty much
8191  * do nothing more than calling asic specific function. This
8192  * should also allow to remove a bunch of callback function
8193  * like vram_info.
8194  */
8195 /**
8196  * cik_init - asic specific driver and hw init
8197  *
8198  * @rdev: radeon_device pointer
8199  *
8200  * Setup asic specific driver variables and program the hw
8201  * to a functional state (CIK).
8202  * Called at driver startup.
8203  * Returns 0 for success, errors for failure.
8204  */
8205 int cik_init(struct radeon_device *rdev)
8206 {
8207 	struct radeon_ring *ring;
8208 	int r;
8209 
8210 	/* Read BIOS */
8211 	if (!radeon_get_bios(rdev)) {
8212 		if (ASIC_IS_AVIVO(rdev))
8213 			return -EINVAL;
8214 	}
8215 	/* Must be an ATOMBIOS */
8216 	if (!rdev->is_atom_bios) {
8217 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8218 		return -EINVAL;
8219 	}
8220 	r = radeon_atombios_init(rdev);
8221 	if (r)
8222 		return r;
8223 
8224 	/* Post card if necessary */
8225 	if (!radeon_card_posted(rdev)) {
8226 		if (!rdev->bios) {
8227 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8228 			return -EINVAL;
8229 		}
8230 		DRM_INFO("GPU not posted. posting now...\n");
8231 		atom_asic_init(rdev->mode_info.atom_context);
8232 	}
8233 	/* init golden registers */
8234 	cik_init_golden_registers(rdev);
8235 	/* Initialize scratch registers */
8236 	cik_scratch_init(rdev);
8237 	/* Initialize surface registers */
8238 	radeon_surface_init(rdev);
8239 	/* Initialize clocks */
8240 	radeon_get_clock_info(rdev->ddev);
8241 
8242 	/* Fence driver */
8243 	r = radeon_fence_driver_init(rdev);
8244 	if (r)
8245 		return r;
8246 
8247 	/* initialize memory controller */
8248 	r = cik_mc_init(rdev);
8249 	if (r)
8250 		return r;
8251 	/* Memory manager */
8252 	r = radeon_bo_init(rdev);
8253 	if (r)
8254 		return r;
8255 
8256 	if (rdev->flags & RADEON_IS_IGP) {
8257 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8258 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8259 			r = cik_init_microcode(rdev);
8260 			if (r) {
8261 				DRM_ERROR("Failed to load firmware!\n");
8262 				return r;
8263 			}
8264 		}
8265 	} else {
8266 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8267 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8268 		    !rdev->mc_fw) {
8269 			r = cik_init_microcode(rdev);
8270 			if (r) {
8271 				DRM_ERROR("Failed to load firmware!\n");
8272 				return r;
8273 			}
8274 		}
8275 	}
8276 
8277 	/* Initialize power management */
8278 	radeon_pm_init(rdev);
8279 
8280 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8281 	ring->ring_obj = NULL;
8282 	r600_ring_init(rdev, ring, 1024 * 1024);
8283 
8284 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8285 	ring->ring_obj = NULL;
8286 	r600_ring_init(rdev, ring, 1024 * 1024);
8287 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8288 	if (r)
8289 		return r;
8290 
8291 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8292 	ring->ring_obj = NULL;
8293 	r600_ring_init(rdev, ring, 1024 * 1024);
8294 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8295 	if (r)
8296 		return r;
8297 
8298 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8299 	ring->ring_obj = NULL;
8300 	r600_ring_init(rdev, ring, 256 * 1024);
8301 
8302 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8303 	ring->ring_obj = NULL;
8304 	r600_ring_init(rdev, ring, 256 * 1024);
8305 
8306 	r = radeon_uvd_init(rdev);
8307 	if (!r) {
8308 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8309 		ring->ring_obj = NULL;
8310 		r600_ring_init(rdev, ring, 4096);
8311 	}
8312 
8313 	r = radeon_vce_init(rdev);
8314 	if (!r) {
8315 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8316 		ring->ring_obj = NULL;
8317 		r600_ring_init(rdev, ring, 4096);
8318 
8319 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8320 		ring->ring_obj = NULL;
8321 		r600_ring_init(rdev, ring, 4096);
8322 	}
8323 
8324 	rdev->ih.ring_obj = NULL;
8325 	r600_ih_ring_init(rdev, 64 * 1024);
8326 
8327 	r = r600_pcie_gart_init(rdev);
8328 	if (r)
8329 		return r;
8330 
8331 	rdev->accel_working = true;
8332 	r = cik_startup(rdev);
8333 	if (r) {
8334 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8335 		cik_cp_fini(rdev);
8336 		cik_sdma_fini(rdev);
8337 		cik_irq_fini(rdev);
8338 		sumo_rlc_fini(rdev);
8339 		cik_mec_fini(rdev);
8340 		radeon_wb_fini(rdev);
8341 		radeon_ib_pool_fini(rdev);
8342 		radeon_vm_manager_fini(rdev);
8343 		radeon_irq_kms_fini(rdev);
8344 		cik_pcie_gart_fini(rdev);
8345 		rdev->accel_working = false;
8346 	}
8347 
8348 	/* Don't start up if the MC ucode is missing.
8349 	 * The default clocks and voltages before the MC ucode
8350 	 * is loaded are not suffient for advanced operations.
8351 	 */
8352 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8353 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8354 		return -EINVAL;
8355 	}
8356 
8357 	return 0;
8358 }
8359 
8360 /**
8361  * cik_fini - asic specific driver and hw fini
8362  *
8363  * @rdev: radeon_device pointer
8364  *
8365  * Tear down the asic specific driver variables and program the hw
8366  * to an idle state (CIK).
8367  * Called at driver unload.
8368  */
8369 void cik_fini(struct radeon_device *rdev)
8370 {
8371 	radeon_pm_fini(rdev);
8372 	cik_cp_fini(rdev);
8373 	cik_sdma_fini(rdev);
8374 	cik_fini_pg(rdev);
8375 	cik_fini_cg(rdev);
8376 	cik_irq_fini(rdev);
8377 	sumo_rlc_fini(rdev);
8378 	cik_mec_fini(rdev);
8379 	radeon_wb_fini(rdev);
8380 	radeon_vm_manager_fini(rdev);
8381 	radeon_ib_pool_fini(rdev);
8382 	radeon_irq_kms_fini(rdev);
8383 	uvd_v1_0_fini(rdev);
8384 	radeon_uvd_fini(rdev);
8385 	radeon_vce_fini(rdev);
8386 	cik_pcie_gart_fini(rdev);
8387 	r600_vram_scratch_fini(rdev);
8388 	radeon_gem_fini(rdev);
8389 	radeon_fence_driver_fini(rdev);
8390 	radeon_bo_fini(rdev);
8391 	radeon_atombios_fini(rdev);
8392 	kfree(rdev->bios);
8393 	rdev->bios = NULL;
8394 }
8395 
8396 void dce8_program_fmt(struct drm_encoder *encoder)
8397 {
8398 	struct drm_device *dev = encoder->dev;
8399 	struct radeon_device *rdev = dev->dev_private;
8400 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8401 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8402 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8403 	int bpc = 0;
8404 	u32 tmp = 0;
8405 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8406 
8407 	if (connector) {
8408 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8409 		bpc = radeon_get_monitor_bpc(connector);
8410 		dither = radeon_connector->dither;
8411 	}
8412 
8413 	/* LVDS/eDP FMT is set up by atom */
8414 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8415 		return;
8416 
8417 	/* not needed for analog */
8418 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8419 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8420 		return;
8421 
8422 	if (bpc == 0)
8423 		return;
8424 
8425 	switch (bpc) {
8426 	case 6:
8427 		if (dither == RADEON_FMT_DITHER_ENABLE)
8428 			/* XXX sort out optimal dither settings */
8429 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8430 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8431 		else
8432 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8433 		break;
8434 	case 8:
8435 		if (dither == RADEON_FMT_DITHER_ENABLE)
8436 			/* XXX sort out optimal dither settings */
8437 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8438 				FMT_RGB_RANDOM_ENABLE |
8439 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8440 		else
8441 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8442 		break;
8443 	case 10:
8444 		if (dither == RADEON_FMT_DITHER_ENABLE)
8445 			/* XXX sort out optimal dither settings */
8446 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8447 				FMT_RGB_RANDOM_ENABLE |
8448 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8449 		else
8450 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8451 		break;
8452 	default:
8453 		/* not needed */
8454 		break;
8455 	}
8456 
8457 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8458 }
8459 
8460 /* display watermark setup */
8461 /**
8462  * dce8_line_buffer_adjust - Set up the line buffer
8463  *
8464  * @rdev: radeon_device pointer
8465  * @radeon_crtc: the selected display controller
8466  * @mode: the current display mode on the selected display
8467  * controller
8468  *
8469  * Setup up the line buffer allocation for
8470  * the selected display controller (CIK).
8471  * Returns the line buffer size in pixels.
8472  */
8473 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8474 				   struct radeon_crtc *radeon_crtc,
8475 				   struct drm_display_mode *mode)
8476 {
8477 	u32 tmp, buffer_alloc, i;
8478 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8479 	/*
8480 	 * Line Buffer Setup
8481 	 * There are 6 line buffers, one for each display controllers.
8482 	 * There are 3 partitions per LB. Select the number of partitions
8483 	 * to enable based on the display width.  For display widths larger
8484 	 * than 4096, you need use to use 2 display controllers and combine
8485 	 * them using the stereo blender.
8486 	 */
8487 	if (radeon_crtc->base.enabled && mode) {
8488 		if (mode->crtc_hdisplay < 1920) {
8489 			tmp = 1;
8490 			buffer_alloc = 2;
8491 		} else if (mode->crtc_hdisplay < 2560) {
8492 			tmp = 2;
8493 			buffer_alloc = 2;
8494 		} else if (mode->crtc_hdisplay < 4096) {
8495 			tmp = 0;
8496 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8497 		} else {
8498 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8499 			tmp = 0;
8500 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8501 		}
8502 	} else {
8503 		tmp = 1;
8504 		buffer_alloc = 0;
8505 	}
8506 
8507 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8508 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8509 
8510 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8511 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8512 	for (i = 0; i < rdev->usec_timeout; i++) {
8513 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8514 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8515 			break;
8516 		udelay(1);
8517 	}
8518 
8519 	if (radeon_crtc->base.enabled && mode) {
8520 		switch (tmp) {
8521 		case 0:
8522 		default:
8523 			return 4096 * 2;
8524 		case 1:
8525 			return 1920 * 2;
8526 		case 2:
8527 			return 2560 * 2;
8528 		}
8529 	}
8530 
8531 	/* controller not enabled, so no lb used */
8532 	return 0;
8533 }
8534 
8535 /**
8536  * cik_get_number_of_dram_channels - get the number of dram channels
8537  *
8538  * @rdev: radeon_device pointer
8539  *
8540  * Look up the number of video ram channels (CIK).
8541  * Used for display watermark bandwidth calculations
8542  * Returns the number of dram channels
8543  */
8544 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8545 {
8546 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8547 
8548 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8549 	case 0:
8550 	default:
8551 		return 1;
8552 	case 1:
8553 		return 2;
8554 	case 2:
8555 		return 4;
8556 	case 3:
8557 		return 8;
8558 	case 4:
8559 		return 3;
8560 	case 5:
8561 		return 6;
8562 	case 6:
8563 		return 10;
8564 	case 7:
8565 		return 12;
8566 	case 8:
8567 		return 16;
8568 	}
8569 }
8570 
8571 struct dce8_wm_params {
8572 	u32 dram_channels; /* number of dram channels */
8573 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8574 	u32 sclk;          /* engine clock in kHz */
8575 	u32 disp_clk;      /* display clock in kHz */
8576 	u32 src_width;     /* viewport width */
8577 	u32 active_time;   /* active display time in ns */
8578 	u32 blank_time;    /* blank time in ns */
8579 	bool interlaced;    /* mode is interlaced */
8580 	fixed20_12 vsc;    /* vertical scale ratio */
8581 	u32 num_heads;     /* number of active crtcs */
8582 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8583 	u32 lb_size;       /* line buffer allocated to pipe */
8584 	u32 vtaps;         /* vertical scaler taps */
8585 };
8586 
8587 /**
8588  * dce8_dram_bandwidth - get the dram bandwidth
8589  *
8590  * @wm: watermark calculation data
8591  *
8592  * Calculate the raw dram bandwidth (CIK).
8593  * Used for display watermark bandwidth calculations
8594  * Returns the dram bandwidth in MBytes/s
8595  */
8596 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8597 {
8598 	/* Calculate raw DRAM Bandwidth */
8599 	fixed20_12 dram_efficiency; /* 0.7 */
8600 	fixed20_12 yclk, dram_channels, bandwidth;
8601 	fixed20_12 a;
8602 
8603 	a.full = dfixed_const(1000);
8604 	yclk.full = dfixed_const(wm->yclk);
8605 	yclk.full = dfixed_div(yclk, a);
8606 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8607 	a.full = dfixed_const(10);
8608 	dram_efficiency.full = dfixed_const(7);
8609 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8610 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8611 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8612 
8613 	return dfixed_trunc(bandwidth);
8614 }
8615 
8616 /**
8617  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8618  *
8619  * @wm: watermark calculation data
8620  *
8621  * Calculate the dram bandwidth used for display (CIK).
8622  * Used for display watermark bandwidth calculations
8623  * Returns the dram bandwidth for display in MBytes/s
8624  */
8625 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8626 {
8627 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8628 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8629 	fixed20_12 yclk, dram_channels, bandwidth;
8630 	fixed20_12 a;
8631 
8632 	a.full = dfixed_const(1000);
8633 	yclk.full = dfixed_const(wm->yclk);
8634 	yclk.full = dfixed_div(yclk, a);
8635 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8636 	a.full = dfixed_const(10);
8637 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8638 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8639 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8640 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8641 
8642 	return dfixed_trunc(bandwidth);
8643 }
8644 
8645 /**
8646  * dce8_data_return_bandwidth - get the data return bandwidth
8647  *
8648  * @wm: watermark calculation data
8649  *
8650  * Calculate the data return bandwidth used for display (CIK).
8651  * Used for display watermark bandwidth calculations
8652  * Returns the data return bandwidth in MBytes/s
8653  */
8654 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8655 {
8656 	/* Calculate the display Data return Bandwidth */
8657 	fixed20_12 return_efficiency; /* 0.8 */
8658 	fixed20_12 sclk, bandwidth;
8659 	fixed20_12 a;
8660 
8661 	a.full = dfixed_const(1000);
8662 	sclk.full = dfixed_const(wm->sclk);
8663 	sclk.full = dfixed_div(sclk, a);
8664 	a.full = dfixed_const(10);
8665 	return_efficiency.full = dfixed_const(8);
8666 	return_efficiency.full = dfixed_div(return_efficiency, a);
8667 	a.full = dfixed_const(32);
8668 	bandwidth.full = dfixed_mul(a, sclk);
8669 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8670 
8671 	return dfixed_trunc(bandwidth);
8672 }
8673 
8674 /**
8675  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8676  *
8677  * @wm: watermark calculation data
8678  *
8679  * Calculate the dmif bandwidth used for display (CIK).
8680  * Used for display watermark bandwidth calculations
8681  * Returns the dmif bandwidth in MBytes/s
8682  */
8683 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8684 {
8685 	/* Calculate the DMIF Request Bandwidth */
8686 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8687 	fixed20_12 disp_clk, bandwidth;
8688 	fixed20_12 a, b;
8689 
8690 	a.full = dfixed_const(1000);
8691 	disp_clk.full = dfixed_const(wm->disp_clk);
8692 	disp_clk.full = dfixed_div(disp_clk, a);
8693 	a.full = dfixed_const(32);
8694 	b.full = dfixed_mul(a, disp_clk);
8695 
8696 	a.full = dfixed_const(10);
8697 	disp_clk_request_efficiency.full = dfixed_const(8);
8698 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8699 
8700 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8701 
8702 	return dfixed_trunc(bandwidth);
8703 }
8704 
8705 /**
8706  * dce8_available_bandwidth - get the min available bandwidth
8707  *
8708  * @wm: watermark calculation data
8709  *
8710  * Calculate the min available bandwidth used for display (CIK).
8711  * Used for display watermark bandwidth calculations
8712  * Returns the min available bandwidth in MBytes/s
8713  */
8714 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8715 {
8716 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8717 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8718 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8719 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8720 
8721 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8722 }
8723 
8724 /**
8725  * dce8_average_bandwidth - get the average available bandwidth
8726  *
8727  * @wm: watermark calculation data
8728  *
8729  * Calculate the average available bandwidth used for display (CIK).
8730  * Used for display watermark bandwidth calculations
8731  * Returns the average available bandwidth in MBytes/s
8732  */
8733 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8734 {
8735 	/* Calculate the display mode Average Bandwidth
8736 	 * DisplayMode should contain the source and destination dimensions,
8737 	 * timing, etc.
8738 	 */
8739 	fixed20_12 bpp;
8740 	fixed20_12 line_time;
8741 	fixed20_12 src_width;
8742 	fixed20_12 bandwidth;
8743 	fixed20_12 a;
8744 
8745 	a.full = dfixed_const(1000);
8746 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8747 	line_time.full = dfixed_div(line_time, a);
8748 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8749 	src_width.full = dfixed_const(wm->src_width);
8750 	bandwidth.full = dfixed_mul(src_width, bpp);
8751 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8752 	bandwidth.full = dfixed_div(bandwidth, line_time);
8753 
8754 	return dfixed_trunc(bandwidth);
8755 }
8756 
8757 /**
8758  * dce8_latency_watermark - get the latency watermark
8759  *
8760  * @wm: watermark calculation data
8761  *
8762  * Calculate the latency watermark (CIK).
8763  * Used for display watermark bandwidth calculations
8764  * Returns the latency watermark in ns
8765  */
8766 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8767 {
8768 	/* First calculate the latency in ns */
8769 	u32 mc_latency = 2000; /* 2000 ns. */
8770 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8771 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8772 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8773 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8774 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8775 		(wm->num_heads * cursor_line_pair_return_time);
8776 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8777 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8778 	u32 tmp, dmif_size = 12288;
8779 	fixed20_12 a, b, c;
8780 
8781 	if (wm->num_heads == 0)
8782 		return 0;
8783 
8784 	a.full = dfixed_const(2);
8785 	b.full = dfixed_const(1);
8786 	if ((wm->vsc.full > a.full) ||
8787 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8788 	    (wm->vtaps >= 5) ||
8789 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8790 		max_src_lines_per_dst_line = 4;
8791 	else
8792 		max_src_lines_per_dst_line = 2;
8793 
8794 	a.full = dfixed_const(available_bandwidth);
8795 	b.full = dfixed_const(wm->num_heads);
8796 	a.full = dfixed_div(a, b);
8797 
8798 	b.full = dfixed_const(mc_latency + 512);
8799 	c.full = dfixed_const(wm->disp_clk);
8800 	b.full = dfixed_div(b, c);
8801 
8802 	c.full = dfixed_const(dmif_size);
8803 	b.full = dfixed_div(c, b);
8804 
8805 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8806 
8807 	b.full = dfixed_const(1000);
8808 	c.full = dfixed_const(wm->disp_clk);
8809 	b.full = dfixed_div(c, b);
8810 	c.full = dfixed_const(wm->bytes_per_pixel);
8811 	b.full = dfixed_mul(b, c);
8812 
8813 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8814 
8815 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8816 	b.full = dfixed_const(1000);
8817 	c.full = dfixed_const(lb_fill_bw);
8818 	b.full = dfixed_div(c, b);
8819 	a.full = dfixed_div(a, b);
8820 	line_fill_time = dfixed_trunc(a);
8821 
8822 	if (line_fill_time < wm->active_time)
8823 		return latency;
8824 	else
8825 		return latency + (line_fill_time - wm->active_time);
8826 
8827 }
8828 
8829 /**
8830  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8831  * average and available dram bandwidth
8832  *
8833  * @wm: watermark calculation data
8834  *
8835  * Check if the display average bandwidth fits in the display
8836  * dram bandwidth (CIK).
8837  * Used for display watermark bandwidth calculations
8838  * Returns true if the display fits, false if not.
8839  */
8840 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8841 {
8842 	if (dce8_average_bandwidth(wm) <=
8843 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8844 		return true;
8845 	else
8846 		return false;
8847 }
8848 
8849 /**
8850  * dce8_average_bandwidth_vs_available_bandwidth - check
8851  * average and available bandwidth
8852  *
8853  * @wm: watermark calculation data
8854  *
8855  * Check if the display average bandwidth fits in the display
8856  * available bandwidth (CIK).
8857  * Used for display watermark bandwidth calculations
8858  * Returns true if the display fits, false if not.
8859  */
8860 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8861 {
8862 	if (dce8_average_bandwidth(wm) <=
8863 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8864 		return true;
8865 	else
8866 		return false;
8867 }
8868 
8869 /**
8870  * dce8_check_latency_hiding - check latency hiding
8871  *
8872  * @wm: watermark calculation data
8873  *
8874  * Check latency hiding (CIK).
8875  * Used for display watermark bandwidth calculations
8876  * Returns true if the display fits, false if not.
8877  */
8878 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8879 {
8880 	u32 lb_partitions = wm->lb_size / wm->src_width;
8881 	u32 line_time = wm->active_time + wm->blank_time;
8882 	u32 latency_tolerant_lines;
8883 	u32 latency_hiding;
8884 	fixed20_12 a;
8885 
8886 	a.full = dfixed_const(1);
8887 	if (wm->vsc.full > a.full)
8888 		latency_tolerant_lines = 1;
8889 	else {
8890 		if (lb_partitions <= (wm->vtaps + 1))
8891 			latency_tolerant_lines = 1;
8892 		else
8893 			latency_tolerant_lines = 2;
8894 	}
8895 
8896 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8897 
8898 	if (dce8_latency_watermark(wm) <= latency_hiding)
8899 		return true;
8900 	else
8901 		return false;
8902 }
8903 
8904 /**
8905  * dce8_program_watermarks - program display watermarks
8906  *
8907  * @rdev: radeon_device pointer
8908  * @radeon_crtc: the selected display controller
8909  * @lb_size: line buffer size
8910  * @num_heads: number of display controllers in use
8911  *
8912  * Calculate and program the display watermarks for the
8913  * selected display controller (CIK).
8914  */
8915 static void dce8_program_watermarks(struct radeon_device *rdev,
8916 				    struct radeon_crtc *radeon_crtc,
8917 				    u32 lb_size, u32 num_heads)
8918 {
8919 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8920 	struct dce8_wm_params wm_low, wm_high;
8921 	u32 pixel_period;
8922 	u32 line_time = 0;
8923 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8924 	u32 tmp, wm_mask;
8925 
8926 	if (radeon_crtc->base.enabled && num_heads && mode) {
8927 		pixel_period = 1000000 / (u32)mode->clock;
8928 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8929 
8930 		/* watermark for high clocks */
8931 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8932 		    rdev->pm.dpm_enabled) {
8933 			wm_high.yclk =
8934 				radeon_dpm_get_mclk(rdev, false) * 10;
8935 			wm_high.sclk =
8936 				radeon_dpm_get_sclk(rdev, false) * 10;
8937 		} else {
8938 			wm_high.yclk = rdev->pm.current_mclk * 10;
8939 			wm_high.sclk = rdev->pm.current_sclk * 10;
8940 		}
8941 
8942 		wm_high.disp_clk = mode->clock;
8943 		wm_high.src_width = mode->crtc_hdisplay;
8944 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8945 		wm_high.blank_time = line_time - wm_high.active_time;
8946 		wm_high.interlaced = false;
8947 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8948 			wm_high.interlaced = true;
8949 		wm_high.vsc = radeon_crtc->vsc;
8950 		wm_high.vtaps = 1;
8951 		if (radeon_crtc->rmx_type != RMX_OFF)
8952 			wm_high.vtaps = 2;
8953 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8954 		wm_high.lb_size = lb_size;
8955 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8956 		wm_high.num_heads = num_heads;
8957 
8958 		/* set for high clocks */
8959 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8960 
8961 		/* possibly force display priority to high */
8962 		/* should really do this at mode validation time... */
8963 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8964 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8965 		    !dce8_check_latency_hiding(&wm_high) ||
8966 		    (rdev->disp_priority == 2)) {
8967 			DRM_DEBUG_KMS("force priority to high\n");
8968 		}
8969 
8970 		/* watermark for low clocks */
8971 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8972 		    rdev->pm.dpm_enabled) {
8973 			wm_low.yclk =
8974 				radeon_dpm_get_mclk(rdev, true) * 10;
8975 			wm_low.sclk =
8976 				radeon_dpm_get_sclk(rdev, true) * 10;
8977 		} else {
8978 			wm_low.yclk = rdev->pm.current_mclk * 10;
8979 			wm_low.sclk = rdev->pm.current_sclk * 10;
8980 		}
8981 
8982 		wm_low.disp_clk = mode->clock;
8983 		wm_low.src_width = mode->crtc_hdisplay;
8984 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8985 		wm_low.blank_time = line_time - wm_low.active_time;
8986 		wm_low.interlaced = false;
8987 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8988 			wm_low.interlaced = true;
8989 		wm_low.vsc = radeon_crtc->vsc;
8990 		wm_low.vtaps = 1;
8991 		if (radeon_crtc->rmx_type != RMX_OFF)
8992 			wm_low.vtaps = 2;
8993 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8994 		wm_low.lb_size = lb_size;
8995 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8996 		wm_low.num_heads = num_heads;
8997 
8998 		/* set for low clocks */
8999 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9000 
9001 		/* possibly force display priority to high */
9002 		/* should really do this at mode validation time... */
9003 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9004 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9005 		    !dce8_check_latency_hiding(&wm_low) ||
9006 		    (rdev->disp_priority == 2)) {
9007 			DRM_DEBUG_KMS("force priority to high\n");
9008 		}
9009 	}
9010 
9011 	/* select wm A */
9012 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9013 	tmp = wm_mask;
9014 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9015 	tmp |= LATENCY_WATERMARK_MASK(1);
9016 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9017 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9018 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9019 		LATENCY_HIGH_WATERMARK(line_time)));
9020 	/* select wm B */
9021 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9022 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9023 	tmp |= LATENCY_WATERMARK_MASK(2);
9024 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9025 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9026 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9027 		LATENCY_HIGH_WATERMARK(line_time)));
9028 	/* restore original selection */
9029 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9030 
9031 	/* save values for DPM */
9032 	radeon_crtc->line_time = line_time;
9033 	radeon_crtc->wm_high = latency_watermark_a;
9034 	radeon_crtc->wm_low = latency_watermark_b;
9035 }
9036 
9037 /**
9038  * dce8_bandwidth_update - program display watermarks
9039  *
9040  * @rdev: radeon_device pointer
9041  *
9042  * Calculate and program the display watermarks and line
9043  * buffer allocation (CIK).
9044  */
9045 void dce8_bandwidth_update(struct radeon_device *rdev)
9046 {
9047 	struct drm_display_mode *mode = NULL;
9048 	u32 num_heads = 0, lb_size;
9049 	int i;
9050 
9051 	radeon_update_display_priority(rdev);
9052 
9053 	for (i = 0; i < rdev->num_crtc; i++) {
9054 		if (rdev->mode_info.crtcs[i]->base.enabled)
9055 			num_heads++;
9056 	}
9057 	for (i = 0; i < rdev->num_crtc; i++) {
9058 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9059 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9060 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9061 	}
9062 }
9063 
9064 /**
9065  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9066  *
9067  * @rdev: radeon_device pointer
9068  *
9069  * Fetches a GPU clock counter snapshot (SI).
9070  * Returns the 64 bit clock counter snapshot.
9071  */
9072 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9073 {
9074 	uint64_t clock;
9075 
9076 	mutex_lock(&rdev->gpu_clock_mutex);
9077 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9078 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9079 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9080 	mutex_unlock(&rdev->gpu_clock_mutex);
9081 	return clock;
9082 }
9083 
9084 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9085                               u32 cntl_reg, u32 status_reg)
9086 {
9087 	int r, i;
9088 	struct atom_clock_dividers dividers;
9089 	uint32_t tmp;
9090 
9091 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9092 					   clock, false, &dividers);
9093 	if (r)
9094 		return r;
9095 
9096 	tmp = RREG32_SMC(cntl_reg);
9097 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9098 	tmp |= dividers.post_divider;
9099 	WREG32_SMC(cntl_reg, tmp);
9100 
9101 	for (i = 0; i < 100; i++) {
9102 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9103 			break;
9104 		mdelay(10);
9105 	}
9106 	if (i == 100)
9107 		return -ETIMEDOUT;
9108 
9109 	return 0;
9110 }
9111 
9112 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9113 {
9114 	int r = 0;
9115 
9116 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9117 	if (r)
9118 		return r;
9119 
9120 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9121 	return r;
9122 }
9123 
9124 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9125 {
9126 	int r, i;
9127 	struct atom_clock_dividers dividers;
9128 	u32 tmp;
9129 
9130 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9131 					   ecclk, false, &dividers);
9132 	if (r)
9133 		return r;
9134 
9135 	for (i = 0; i < 100; i++) {
9136 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9137 			break;
9138 		mdelay(10);
9139 	}
9140 	if (i == 100)
9141 		return -ETIMEDOUT;
9142 
9143 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9144 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9145 	tmp |= dividers.post_divider;
9146 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9147 
9148 	for (i = 0; i < 100; i++) {
9149 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9150 			break;
9151 		mdelay(10);
9152 	}
9153 	if (i == 100)
9154 		return -ETIMEDOUT;
9155 
9156 	return 0;
9157 }
9158 
9159 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9160 {
9161 	struct pci_dev *root = rdev->pdev->bus->self;
9162 	int bridge_pos, gpu_pos;
9163 	u32 speed_cntl, mask, current_data_rate;
9164 	int ret, i;
9165 	u16 tmp16;
9166 
9167 	if (radeon_pcie_gen2 == 0)
9168 		return;
9169 
9170 	if (rdev->flags & RADEON_IS_IGP)
9171 		return;
9172 
9173 	if (!(rdev->flags & RADEON_IS_PCIE))
9174 		return;
9175 
9176 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9177 	if (ret != 0)
9178 		return;
9179 
9180 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9181 		return;
9182 
9183 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9184 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9185 		LC_CURRENT_DATA_RATE_SHIFT;
9186 	if (mask & DRM_PCIE_SPEED_80) {
9187 		if (current_data_rate == 2) {
9188 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9189 			return;
9190 		}
9191 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9192 	} else if (mask & DRM_PCIE_SPEED_50) {
9193 		if (current_data_rate == 1) {
9194 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9195 			return;
9196 		}
9197 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9198 	}
9199 
9200 	bridge_pos = pci_pcie_cap(root);
9201 	if (!bridge_pos)
9202 		return;
9203 
9204 	gpu_pos = pci_pcie_cap(rdev->pdev);
9205 	if (!gpu_pos)
9206 		return;
9207 
9208 	if (mask & DRM_PCIE_SPEED_80) {
9209 		/* re-try equalization if gen3 is not already enabled */
9210 		if (current_data_rate != 2) {
9211 			u16 bridge_cfg, gpu_cfg;
9212 			u16 bridge_cfg2, gpu_cfg2;
9213 			u32 max_lw, current_lw, tmp;
9214 
9215 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9216 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9217 
9218 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9219 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9220 
9221 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9222 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9223 
9224 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9225 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9226 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9227 
9228 			if (current_lw < max_lw) {
9229 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9230 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9231 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9232 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9233 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9234 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9235 				}
9236 			}
9237 
9238 			for (i = 0; i < 10; i++) {
9239 				/* check status */
9240 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9241 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9242 					break;
9243 
9244 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9245 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9246 
9247 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9248 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9249 
9250 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9251 				tmp |= LC_SET_QUIESCE;
9252 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9253 
9254 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9255 				tmp |= LC_REDO_EQ;
9256 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9257 
9258 				mdelay(100);
9259 
9260 				/* linkctl */
9261 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9262 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9263 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9264 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9265 
9266 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9267 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9268 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9269 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9270 
9271 				/* linkctl2 */
9272 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9273 				tmp16 &= ~((1 << 4) | (7 << 9));
9274 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9275 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9276 
9277 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9278 				tmp16 &= ~((1 << 4) | (7 << 9));
9279 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9280 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9281 
9282 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9283 				tmp &= ~LC_SET_QUIESCE;
9284 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9285 			}
9286 		}
9287 	}
9288 
9289 	/* set the link speed */
9290 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9291 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9292 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9293 
9294 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9295 	tmp16 &= ~0xf;
9296 	if (mask & DRM_PCIE_SPEED_80)
9297 		tmp16 |= 3; /* gen3 */
9298 	else if (mask & DRM_PCIE_SPEED_50)
9299 		tmp16 |= 2; /* gen2 */
9300 	else
9301 		tmp16 |= 1; /* gen1 */
9302 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9303 
9304 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9305 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9306 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9307 
9308 	for (i = 0; i < rdev->usec_timeout; i++) {
9309 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9310 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9311 			break;
9312 		udelay(1);
9313 	}
9314 }
9315 
9316 static void cik_program_aspm(struct radeon_device *rdev)
9317 {
9318 	u32 data, orig;
9319 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9320 	bool disable_clkreq = false;
9321 
9322 	if (radeon_aspm == 0)
9323 		return;
9324 
9325 	/* XXX double check IGPs */
9326 	if (rdev->flags & RADEON_IS_IGP)
9327 		return;
9328 
9329 	if (!(rdev->flags & RADEON_IS_PCIE))
9330 		return;
9331 
9332 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9333 	data &= ~LC_XMIT_N_FTS_MASK;
9334 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9335 	if (orig != data)
9336 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9337 
9338 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9339 	data |= LC_GO_TO_RECOVERY;
9340 	if (orig != data)
9341 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9342 
9343 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9344 	data |= P_IGNORE_EDB_ERR;
9345 	if (orig != data)
9346 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9347 
9348 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9349 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9350 	data |= LC_PMI_TO_L1_DIS;
9351 	if (!disable_l0s)
9352 		data |= LC_L0S_INACTIVITY(7);
9353 
9354 	if (!disable_l1) {
9355 		data |= LC_L1_INACTIVITY(7);
9356 		data &= ~LC_PMI_TO_L1_DIS;
9357 		if (orig != data)
9358 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9359 
9360 		if (!disable_plloff_in_l1) {
9361 			bool clk_req_support;
9362 
9363 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9364 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9365 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9366 			if (orig != data)
9367 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9368 
9369 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9370 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9371 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9372 			if (orig != data)
9373 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9374 
9375 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9376 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9377 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9378 			if (orig != data)
9379 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9380 
9381 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9382 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9383 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9384 			if (orig != data)
9385 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9386 
9387 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9388 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9389 			data |= LC_DYN_LANES_PWR_STATE(3);
9390 			if (orig != data)
9391 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9392 
9393 			if (!disable_clkreq) {
9394 				struct pci_dev *root = rdev->pdev->bus->self;
9395 				u32 lnkcap;
9396 
9397 				clk_req_support = false;
9398 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9399 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9400 					clk_req_support = true;
9401 			} else {
9402 				clk_req_support = false;
9403 			}
9404 
9405 			if (clk_req_support) {
9406 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9407 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9408 				if (orig != data)
9409 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9410 
9411 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9412 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9413 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9414 				if (orig != data)
9415 					WREG32_SMC(THM_CLK_CNTL, data);
9416 
9417 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9418 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9419 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9420 				if (orig != data)
9421 					WREG32_SMC(MISC_CLK_CTRL, data);
9422 
9423 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9424 				data &= ~BCLK_AS_XCLK;
9425 				if (orig != data)
9426 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9427 
9428 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9429 				data &= ~FORCE_BIF_REFCLK_EN;
9430 				if (orig != data)
9431 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9432 
9433 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9434 				data &= ~MPLL_CLKOUT_SEL_MASK;
9435 				data |= MPLL_CLKOUT_SEL(4);
9436 				if (orig != data)
9437 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9438 			}
9439 		}
9440 	} else {
9441 		if (orig != data)
9442 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9443 	}
9444 
9445 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9446 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9447 	if (orig != data)
9448 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9449 
9450 	if (!disable_l0s) {
9451 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9452 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9453 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9454 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9455 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9456 				data &= ~LC_L0S_INACTIVITY_MASK;
9457 				if (orig != data)
9458 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9459 			}
9460 		}
9461 	}
9462 }
9463