xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 62e7ca52)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72 
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78 extern void sumo_rlc_fini(struct radeon_device *rdev);
79 extern int sumo_rlc_init(struct radeon_device *rdev);
80 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81 extern void si_rlc_reset(struct radeon_device *rdev);
82 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
84 extern int cik_sdma_resume(struct radeon_device *rdev);
85 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
86 extern void cik_sdma_fini(struct radeon_device *rdev);
87 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
88 static void cik_rlc_stop(struct radeon_device *rdev);
89 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
90 static void cik_program_aspm(struct radeon_device *rdev);
91 static void cik_init_pg(struct radeon_device *rdev);
92 static void cik_init_cg(struct radeon_device *rdev);
93 static void cik_fini_pg(struct radeon_device *rdev);
94 static void cik_fini_cg(struct radeon_device *rdev);
95 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
96 					  bool enable);
97 
98 /* get temperature in millidegrees */
99 int ci_get_temp(struct radeon_device *rdev)
100 {
101 	u32 temp;
102 	int actual_temp = 0;
103 
104 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
105 		CTF_TEMP_SHIFT;
106 
107 	if (temp & 0x200)
108 		actual_temp = 255;
109 	else
110 		actual_temp = temp & 0x1ff;
111 
112 	actual_temp = actual_temp * 1000;
113 
114 	return actual_temp;
115 }
116 
117 /* get temperature in millidegrees */
118 int kv_get_temp(struct radeon_device *rdev)
119 {
120 	u32 temp;
121 	int actual_temp = 0;
122 
123 	temp = RREG32_SMC(0xC0300E0C);
124 
125 	if (temp)
126 		actual_temp = (temp / 8) - 49;
127 	else
128 		actual_temp = 0;
129 
130 	actual_temp = actual_temp * 1000;
131 
132 	return actual_temp;
133 }
134 
135 /*
136  * Indirect registers accessor
137  */
138 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
139 {
140 	unsigned long flags;
141 	u32 r;
142 
143 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
144 	WREG32(PCIE_INDEX, reg);
145 	(void)RREG32(PCIE_INDEX);
146 	r = RREG32(PCIE_DATA);
147 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 	return r;
149 }
150 
151 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
152 {
153 	unsigned long flags;
154 
155 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
156 	WREG32(PCIE_INDEX, reg);
157 	(void)RREG32(PCIE_INDEX);
158 	WREG32(PCIE_DATA, v);
159 	(void)RREG32(PCIE_DATA);
160 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
161 }
162 
163 static const u32 spectre_rlc_save_restore_register_list[] =
164 {
165 	(0x0e00 << 16) | (0xc12c >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc140 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc150 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc15c >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc168 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc170 >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc178 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0xc204 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0xc2b4 >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0xc2b8 >> 2),
184 	0x00000000,
185 	(0x0e00 << 16) | (0xc2bc >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0xc2c0 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x8228 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0x829c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x869c >> 2),
194 	0x00000000,
195 	(0x0600 << 16) | (0x98f4 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x98f8 >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x9900 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc260 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0x90e8 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0x3c000 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0x3c00c >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0x8c1c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0x9700 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x4e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0x5e00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0x6e00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x7e00 << 16) | (0xcd20 >> 2),
222 	0x00000000,
223 	(0x8e00 << 16) | (0xcd20 >> 2),
224 	0x00000000,
225 	(0x9e00 << 16) | (0xcd20 >> 2),
226 	0x00000000,
227 	(0xae00 << 16) | (0xcd20 >> 2),
228 	0x00000000,
229 	(0xbe00 << 16) | (0xcd20 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x89bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8900 >> 2),
234 	0x00000000,
235 	0x3,
236 	(0x0e00 << 16) | (0xc130 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc134 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc1fc >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc208 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc264 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc268 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc26c >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc270 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc274 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc278 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc27c >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc280 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc284 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc288 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc28c >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc290 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc294 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc298 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc29c >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc2a0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc2a4 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc2a8 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc2ac  >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc2b0 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x301d0 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x30238 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x30250 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x30254 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x30258 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x3025c >> 2),
295 	0x00000000,
296 	(0x4e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0x5e00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0x6e00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x7e00 << 16) | (0xc900 >> 2),
303 	0x00000000,
304 	(0x8e00 << 16) | (0xc900 >> 2),
305 	0x00000000,
306 	(0x9e00 << 16) | (0xc900 >> 2),
307 	0x00000000,
308 	(0xae00 << 16) | (0xc900 >> 2),
309 	0x00000000,
310 	(0xbe00 << 16) | (0xc900 >> 2),
311 	0x00000000,
312 	(0x4e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0x5e00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0x6e00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x7e00 << 16) | (0xc904 >> 2),
319 	0x00000000,
320 	(0x8e00 << 16) | (0xc904 >> 2),
321 	0x00000000,
322 	(0x9e00 << 16) | (0xc904 >> 2),
323 	0x00000000,
324 	(0xae00 << 16) | (0xc904 >> 2),
325 	0x00000000,
326 	(0xbe00 << 16) | (0xc904 >> 2),
327 	0x00000000,
328 	(0x4e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0x5e00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0x6e00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x7e00 << 16) | (0xc908 >> 2),
335 	0x00000000,
336 	(0x8e00 << 16) | (0xc908 >> 2),
337 	0x00000000,
338 	(0x9e00 << 16) | (0xc908 >> 2),
339 	0x00000000,
340 	(0xae00 << 16) | (0xc908 >> 2),
341 	0x00000000,
342 	(0xbe00 << 16) | (0xc908 >> 2),
343 	0x00000000,
344 	(0x4e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0x5e00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0x6e00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x7e00 << 16) | (0xc90c >> 2),
351 	0x00000000,
352 	(0x8e00 << 16) | (0xc90c >> 2),
353 	0x00000000,
354 	(0x9e00 << 16) | (0xc90c >> 2),
355 	0x00000000,
356 	(0xae00 << 16) | (0xc90c >> 2),
357 	0x00000000,
358 	(0xbe00 << 16) | (0xc90c >> 2),
359 	0x00000000,
360 	(0x4e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0x5e00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0x6e00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x7e00 << 16) | (0xc910 >> 2),
367 	0x00000000,
368 	(0x8e00 << 16) | (0xc910 >> 2),
369 	0x00000000,
370 	(0x9e00 << 16) | (0xc910 >> 2),
371 	0x00000000,
372 	(0xae00 << 16) | (0xc910 >> 2),
373 	0x00000000,
374 	(0xbe00 << 16) | (0xc910 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc99c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x9834 >> 2),
379 	0x00000000,
380 	(0x0000 << 16) | (0x30f00 >> 2),
381 	0x00000000,
382 	(0x0001 << 16) | (0x30f00 >> 2),
383 	0x00000000,
384 	(0x0000 << 16) | (0x30f04 >> 2),
385 	0x00000000,
386 	(0x0001 << 16) | (0x30f04 >> 2),
387 	0x00000000,
388 	(0x0000 << 16) | (0x30f08 >> 2),
389 	0x00000000,
390 	(0x0001 << 16) | (0x30f08 >> 2),
391 	0x00000000,
392 	(0x0000 << 16) | (0x30f0c >> 2),
393 	0x00000000,
394 	(0x0001 << 16) | (0x30f0c >> 2),
395 	0x00000000,
396 	(0x0600 << 16) | (0x9b7c >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8a14 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x8a18 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a00 >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x8bf0 >> 2),
405 	0x00000000,
406 	(0x0e00 << 16) | (0x8bcc >> 2),
407 	0x00000000,
408 	(0x0e00 << 16) | (0x8b24 >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0x30a04 >> 2),
411 	0x00000000,
412 	(0x0600 << 16) | (0x30a10 >> 2),
413 	0x00000000,
414 	(0x0600 << 16) | (0x30a14 >> 2),
415 	0x00000000,
416 	(0x0600 << 16) | (0x30a18 >> 2),
417 	0x00000000,
418 	(0x0600 << 16) | (0x30a2c >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc700 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc704 >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0xc708 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0xc768 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc770 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc774 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc778 >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc77c >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc780 >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc784 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc788 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc78c >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc798 >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc79c >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7a0 >> 2),
449 	0x00000000,
450 	(0x0400 << 16) | (0xc7a4 >> 2),
451 	0x00000000,
452 	(0x0400 << 16) | (0xc7a8 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc7ac >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc7b0 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc7b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x9100 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x3c010 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92a8 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92ac >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92b4 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92b8 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92bc >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x92c0 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x92c4 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x92c8 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x92cc >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x92d0 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0x8c00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x8c04 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x8c20 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x8c38 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x8c3c >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xae00 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x9604 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac08 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac0c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac10 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac14 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac58 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac68 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac6c >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac70 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac74 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac78 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0xac7c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xac80 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0xac84 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac88 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac8c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x970c >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x9714 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x9718 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x971c >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x4e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0x5e00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0x6e00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x7e00 << 16) | (0x31068 >> 2),
545 	0x00000000,
546 	(0x8e00 << 16) | (0x31068 >> 2),
547 	0x00000000,
548 	(0x9e00 << 16) | (0x31068 >> 2),
549 	0x00000000,
550 	(0xae00 << 16) | (0x31068 >> 2),
551 	0x00000000,
552 	(0xbe00 << 16) | (0x31068 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xcd10 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xcd14 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88b0 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88b4 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88b8 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88bc >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0x89c0 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x88c4 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x88c8 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x88d0 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x88d4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x88d8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x8980 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30938 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x3093c >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x30940 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x89a0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x30900 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x30904 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x89b4 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x3c210 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x3c214 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x3c218 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8904 >> 2),
601 	0x00000000,
602 	0x5,
603 	(0x0e00 << 16) | (0x8c28 >> 2),
604 	(0x0e00 << 16) | (0x8c2c >> 2),
605 	(0x0e00 << 16) | (0x8c30 >> 2),
606 	(0x0e00 << 16) | (0x8c34 >> 2),
607 	(0x0e00 << 16) | (0x9600 >> 2),
608 };
609 
610 static const u32 kalindi_rlc_save_restore_register_list[] =
611 {
612 	(0x0e00 << 16) | (0xc12c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc140 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc150 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc15c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc168 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc170 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xc204 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xc2b4 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xc2b8 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xc2bc >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xc2c0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x8228 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x829c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x869c >> 2),
639 	0x00000000,
640 	(0x0600 << 16) | (0x98f4 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x98f8 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x9900 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc260 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0x90e8 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0x3c000 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x3c00c >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0x8c1c >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0x9700 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd20 >> 2),
659 	0x00000000,
660 	(0x4e00 << 16) | (0xcd20 >> 2),
661 	0x00000000,
662 	(0x5e00 << 16) | (0xcd20 >> 2),
663 	0x00000000,
664 	(0x6e00 << 16) | (0xcd20 >> 2),
665 	0x00000000,
666 	(0x7e00 << 16) | (0xcd20 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x89bc >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x8900 >> 2),
671 	0x00000000,
672 	0x3,
673 	(0x0e00 << 16) | (0xc130 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc134 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc1fc >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc208 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc264 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc268 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc26c >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc270 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc274 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc28c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc290 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc294 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc298 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc2a0 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc2a4 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc2a8 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc2ac >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x301d0 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x30238 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0x30250 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x30254 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x30258 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x3025c >> 2),
718 	0x00000000,
719 	(0x4e00 << 16) | (0xc900 >> 2),
720 	0x00000000,
721 	(0x5e00 << 16) | (0xc900 >> 2),
722 	0x00000000,
723 	(0x6e00 << 16) | (0xc900 >> 2),
724 	0x00000000,
725 	(0x7e00 << 16) | (0xc900 >> 2),
726 	0x00000000,
727 	(0x4e00 << 16) | (0xc904 >> 2),
728 	0x00000000,
729 	(0x5e00 << 16) | (0xc904 >> 2),
730 	0x00000000,
731 	(0x6e00 << 16) | (0xc904 >> 2),
732 	0x00000000,
733 	(0x7e00 << 16) | (0xc904 >> 2),
734 	0x00000000,
735 	(0x4e00 << 16) | (0xc908 >> 2),
736 	0x00000000,
737 	(0x5e00 << 16) | (0xc908 >> 2),
738 	0x00000000,
739 	(0x6e00 << 16) | (0xc908 >> 2),
740 	0x00000000,
741 	(0x7e00 << 16) | (0xc908 >> 2),
742 	0x00000000,
743 	(0x4e00 << 16) | (0xc90c >> 2),
744 	0x00000000,
745 	(0x5e00 << 16) | (0xc90c >> 2),
746 	0x00000000,
747 	(0x6e00 << 16) | (0xc90c >> 2),
748 	0x00000000,
749 	(0x7e00 << 16) | (0xc90c >> 2),
750 	0x00000000,
751 	(0x4e00 << 16) | (0xc910 >> 2),
752 	0x00000000,
753 	(0x5e00 << 16) | (0xc910 >> 2),
754 	0x00000000,
755 	(0x6e00 << 16) | (0xc910 >> 2),
756 	0x00000000,
757 	(0x7e00 << 16) | (0xc910 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0xc99c >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x9834 >> 2),
762 	0x00000000,
763 	(0x0000 << 16) | (0x30f00 >> 2),
764 	0x00000000,
765 	(0x0000 << 16) | (0x30f04 >> 2),
766 	0x00000000,
767 	(0x0000 << 16) | (0x30f08 >> 2),
768 	0x00000000,
769 	(0x0000 << 16) | (0x30f0c >> 2),
770 	0x00000000,
771 	(0x0600 << 16) | (0x9b7c >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8a14 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x8a18 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a00 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x8bf0 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0x8bcc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0x8b24 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0x30a04 >> 2),
786 	0x00000000,
787 	(0x0600 << 16) | (0x30a10 >> 2),
788 	0x00000000,
789 	(0x0600 << 16) | (0x30a14 >> 2),
790 	0x00000000,
791 	(0x0600 << 16) | (0x30a18 >> 2),
792 	0x00000000,
793 	(0x0600 << 16) | (0x30a2c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc700 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc704 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc708 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc768 >> 2),
802 	0x00000000,
803 	(0x0400 << 16) | (0xc770 >> 2),
804 	0x00000000,
805 	(0x0400 << 16) | (0xc774 >> 2),
806 	0x00000000,
807 	(0x0400 << 16) | (0xc798 >> 2),
808 	0x00000000,
809 	(0x0400 << 16) | (0xc79c >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x9100 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x3c010 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x8c00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8c04 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8c20 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8c38 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8c3c >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xae00 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x9604 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac08 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac0c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac10 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac14 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac58 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac68 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac6c >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac70 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac74 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac78 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0xac7c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xac80 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0xac84 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac88 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac8c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x970c >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x9714 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x9718 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x971c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x31068 >> 2),
868 	0x00000000,
869 	(0x4e00 << 16) | (0x31068 >> 2),
870 	0x00000000,
871 	(0x5e00 << 16) | (0x31068 >> 2),
872 	0x00000000,
873 	(0x6e00 << 16) | (0x31068 >> 2),
874 	0x00000000,
875 	(0x7e00 << 16) | (0x31068 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xcd10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xcd14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88b0 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88b4 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88b8 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88bc >> 2),
888 	0x00000000,
889 	(0x0400 << 16) | (0x89c0 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x88c4 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x88c8 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x88d0 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x88d4 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x88d8 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x8980 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30938 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x3093c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x30940 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x89a0 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x30900 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x30904 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x89b4 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3e1fc >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x3c210 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c214 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x3c218 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8904 >> 2),
926 	0x00000000,
927 	0x5,
928 	(0x0e00 << 16) | (0x8c28 >> 2),
929 	(0x0e00 << 16) | (0x8c2c >> 2),
930 	(0x0e00 << 16) | (0x8c30 >> 2),
931 	(0x0e00 << 16) | (0x8c34 >> 2),
932 	(0x0e00 << 16) | (0x9600 >> 2),
933 };
934 
935 static const u32 bonaire_golden_spm_registers[] =
936 {
937 	0x30800, 0xe0ffffff, 0xe0000000
938 };
939 
940 static const u32 bonaire_golden_common_registers[] =
941 {
942 	0xc770, 0xffffffff, 0x00000800,
943 	0xc774, 0xffffffff, 0x00000800,
944 	0xc798, 0xffffffff, 0x00007fbf,
945 	0xc79c, 0xffffffff, 0x00007faf
946 };
947 
948 static const u32 bonaire_golden_registers[] =
949 {
950 	0x3354, 0x00000333, 0x00000333,
951 	0x3350, 0x000c0fc0, 0x00040200,
952 	0x9a10, 0x00010000, 0x00058208,
953 	0x3c000, 0xffff1fff, 0x00140000,
954 	0x3c200, 0xfdfc0fff, 0x00000100,
955 	0x3c234, 0x40000000, 0x40000200,
956 	0x9830, 0xffffffff, 0x00000000,
957 	0x9834, 0xf00fffff, 0x00000400,
958 	0x9838, 0x0002021c, 0x00020200,
959 	0xc78, 0x00000080, 0x00000000,
960 	0x5bb0, 0x000000f0, 0x00000070,
961 	0x5bc0, 0xf0311fff, 0x80300000,
962 	0x98f8, 0x73773777, 0x12010001,
963 	0x350c, 0x00810000, 0x408af000,
964 	0x7030, 0x31000111, 0x00000011,
965 	0x2f48, 0x73773777, 0x12010001,
966 	0x220c, 0x00007fb6, 0x0021a1b1,
967 	0x2210, 0x00007fb6, 0x002021b1,
968 	0x2180, 0x00007fb6, 0x00002191,
969 	0x2218, 0x00007fb6, 0x002121b1,
970 	0x221c, 0x00007fb6, 0x002021b1,
971 	0x21dc, 0x00007fb6, 0x00002191,
972 	0x21e0, 0x00007fb6, 0x00002191,
973 	0x3628, 0x0000003f, 0x0000000a,
974 	0x362c, 0x0000003f, 0x0000000a,
975 	0x2ae4, 0x00073ffe, 0x000022a2,
976 	0x240c, 0x000007ff, 0x00000000,
977 	0x8a14, 0xf000003f, 0x00000007,
978 	0x8bf0, 0x00002001, 0x00000001,
979 	0x8b24, 0xffffffff, 0x00ffffff,
980 	0x30a04, 0x0000ff0f, 0x00000000,
981 	0x28a4c, 0x07ffffff, 0x06000000,
982 	0x4d8, 0x00000fff, 0x00000100,
983 	0x3e78, 0x00000001, 0x00000002,
984 	0x9100, 0x03000000, 0x0362c688,
985 	0x8c00, 0x000000ff, 0x00000001,
986 	0xe40, 0x00001fff, 0x00001fff,
987 	0x9060, 0x0000007f, 0x00000020,
988 	0x9508, 0x00010000, 0x00010000,
989 	0xac14, 0x000003ff, 0x000000f3,
990 	0xac0c, 0xffffffff, 0x00001032
991 };
992 
993 static const u32 bonaire_mgcg_cgcg_init[] =
994 {
995 	0xc420, 0xffffffff, 0xfffffffc,
996 	0x30800, 0xffffffff, 0xe0000000,
997 	0x3c2a0, 0xffffffff, 0x00000100,
998 	0x3c208, 0xffffffff, 0x00000100,
999 	0x3c2c0, 0xffffffff, 0xc0000100,
1000 	0x3c2c8, 0xffffffff, 0xc0000100,
1001 	0x3c2c4, 0xffffffff, 0xc0000100,
1002 	0x55e4, 0xffffffff, 0x00600100,
1003 	0x3c280, 0xffffffff, 0x00000100,
1004 	0x3c214, 0xffffffff, 0x06000100,
1005 	0x3c220, 0xffffffff, 0x00000100,
1006 	0x3c218, 0xffffffff, 0x06000100,
1007 	0x3c204, 0xffffffff, 0x00000100,
1008 	0x3c2e0, 0xffffffff, 0x00000100,
1009 	0x3c224, 0xffffffff, 0x00000100,
1010 	0x3c200, 0xffffffff, 0x00000100,
1011 	0x3c230, 0xffffffff, 0x00000100,
1012 	0x3c234, 0xffffffff, 0x00000100,
1013 	0x3c250, 0xffffffff, 0x00000100,
1014 	0x3c254, 0xffffffff, 0x00000100,
1015 	0x3c258, 0xffffffff, 0x00000100,
1016 	0x3c25c, 0xffffffff, 0x00000100,
1017 	0x3c260, 0xffffffff, 0x00000100,
1018 	0x3c27c, 0xffffffff, 0x00000100,
1019 	0x3c278, 0xffffffff, 0x00000100,
1020 	0x3c210, 0xffffffff, 0x06000100,
1021 	0x3c290, 0xffffffff, 0x00000100,
1022 	0x3c274, 0xffffffff, 0x00000100,
1023 	0x3c2b4, 0xffffffff, 0x00000100,
1024 	0x3c2b0, 0xffffffff, 0x00000100,
1025 	0x3c270, 0xffffffff, 0x00000100,
1026 	0x30800, 0xffffffff, 0xe0000000,
1027 	0x3c020, 0xffffffff, 0x00010000,
1028 	0x3c024, 0xffffffff, 0x00030002,
1029 	0x3c028, 0xffffffff, 0x00040007,
1030 	0x3c02c, 0xffffffff, 0x00060005,
1031 	0x3c030, 0xffffffff, 0x00090008,
1032 	0x3c034, 0xffffffff, 0x00010000,
1033 	0x3c038, 0xffffffff, 0x00030002,
1034 	0x3c03c, 0xffffffff, 0x00040007,
1035 	0x3c040, 0xffffffff, 0x00060005,
1036 	0x3c044, 0xffffffff, 0x00090008,
1037 	0x3c048, 0xffffffff, 0x00010000,
1038 	0x3c04c, 0xffffffff, 0x00030002,
1039 	0x3c050, 0xffffffff, 0x00040007,
1040 	0x3c054, 0xffffffff, 0x00060005,
1041 	0x3c058, 0xffffffff, 0x00090008,
1042 	0x3c05c, 0xffffffff, 0x00010000,
1043 	0x3c060, 0xffffffff, 0x00030002,
1044 	0x3c064, 0xffffffff, 0x00040007,
1045 	0x3c068, 0xffffffff, 0x00060005,
1046 	0x3c06c, 0xffffffff, 0x00090008,
1047 	0x3c070, 0xffffffff, 0x00010000,
1048 	0x3c074, 0xffffffff, 0x00030002,
1049 	0x3c078, 0xffffffff, 0x00040007,
1050 	0x3c07c, 0xffffffff, 0x00060005,
1051 	0x3c080, 0xffffffff, 0x00090008,
1052 	0x3c084, 0xffffffff, 0x00010000,
1053 	0x3c088, 0xffffffff, 0x00030002,
1054 	0x3c08c, 0xffffffff, 0x00040007,
1055 	0x3c090, 0xffffffff, 0x00060005,
1056 	0x3c094, 0xffffffff, 0x00090008,
1057 	0x3c098, 0xffffffff, 0x00010000,
1058 	0x3c09c, 0xffffffff, 0x00030002,
1059 	0x3c0a0, 0xffffffff, 0x00040007,
1060 	0x3c0a4, 0xffffffff, 0x00060005,
1061 	0x3c0a8, 0xffffffff, 0x00090008,
1062 	0x3c000, 0xffffffff, 0x96e00200,
1063 	0x8708, 0xffffffff, 0x00900100,
1064 	0xc424, 0xffffffff, 0x0020003f,
1065 	0x38, 0xffffffff, 0x0140001c,
1066 	0x3c, 0x000f0000, 0x000f0000,
1067 	0x220, 0xffffffff, 0xC060000C,
1068 	0x224, 0xc0000fff, 0x00000100,
1069 	0xf90, 0xffffffff, 0x00000100,
1070 	0xf98, 0x00000101, 0x00000000,
1071 	0x20a8, 0xffffffff, 0x00000104,
1072 	0x55e4, 0xff000fff, 0x00000100,
1073 	0x30cc, 0xc0000fff, 0x00000104,
1074 	0xc1e4, 0x00000001, 0x00000001,
1075 	0xd00c, 0xff000ff0, 0x00000100,
1076 	0xd80c, 0xff000ff0, 0x00000100
1077 };
1078 
1079 static const u32 spectre_golden_spm_registers[] =
1080 {
1081 	0x30800, 0xe0ffffff, 0xe0000000
1082 };
1083 
1084 static const u32 spectre_golden_common_registers[] =
1085 {
1086 	0xc770, 0xffffffff, 0x00000800,
1087 	0xc774, 0xffffffff, 0x00000800,
1088 	0xc798, 0xffffffff, 0x00007fbf,
1089 	0xc79c, 0xffffffff, 0x00007faf
1090 };
1091 
1092 static const u32 spectre_golden_registers[] =
1093 {
1094 	0x3c000, 0xffff1fff, 0x96940200,
1095 	0x3c00c, 0xffff0001, 0xff000000,
1096 	0x3c200, 0xfffc0fff, 0x00000100,
1097 	0x6ed8, 0x00010101, 0x00010000,
1098 	0x9834, 0xf00fffff, 0x00000400,
1099 	0x9838, 0xfffffffc, 0x00020200,
1100 	0x5bb0, 0x000000f0, 0x00000070,
1101 	0x5bc0, 0xf0311fff, 0x80300000,
1102 	0x98f8, 0x73773777, 0x12010001,
1103 	0x9b7c, 0x00ff0000, 0x00fc0000,
1104 	0x2f48, 0x73773777, 0x12010001,
1105 	0x8a14, 0xf000003f, 0x00000007,
1106 	0x8b24, 0xffffffff, 0x00ffffff,
1107 	0x28350, 0x3f3f3fff, 0x00000082,
1108 	0x28354, 0x0000003f, 0x00000000,
1109 	0x3e78, 0x00000001, 0x00000002,
1110 	0x913c, 0xffff03df, 0x00000004,
1111 	0xc768, 0x00000008, 0x00000008,
1112 	0x8c00, 0x000008ff, 0x00000800,
1113 	0x9508, 0x00010000, 0x00010000,
1114 	0xac0c, 0xffffffff, 0x54763210,
1115 	0x214f8, 0x01ff01ff, 0x00000002,
1116 	0x21498, 0x007ff800, 0x00200000,
1117 	0x2015c, 0xffffffff, 0x00000f40,
1118 	0x30934, 0xffffffff, 0x00000001
1119 };
1120 
1121 static const u32 spectre_mgcg_cgcg_init[] =
1122 {
1123 	0xc420, 0xffffffff, 0xfffffffc,
1124 	0x30800, 0xffffffff, 0xe0000000,
1125 	0x3c2a0, 0xffffffff, 0x00000100,
1126 	0x3c208, 0xffffffff, 0x00000100,
1127 	0x3c2c0, 0xffffffff, 0x00000100,
1128 	0x3c2c8, 0xffffffff, 0x00000100,
1129 	0x3c2c4, 0xffffffff, 0x00000100,
1130 	0x55e4, 0xffffffff, 0x00600100,
1131 	0x3c280, 0xffffffff, 0x00000100,
1132 	0x3c214, 0xffffffff, 0x06000100,
1133 	0x3c220, 0xffffffff, 0x00000100,
1134 	0x3c218, 0xffffffff, 0x06000100,
1135 	0x3c204, 0xffffffff, 0x00000100,
1136 	0x3c2e0, 0xffffffff, 0x00000100,
1137 	0x3c224, 0xffffffff, 0x00000100,
1138 	0x3c200, 0xffffffff, 0x00000100,
1139 	0x3c230, 0xffffffff, 0x00000100,
1140 	0x3c234, 0xffffffff, 0x00000100,
1141 	0x3c250, 0xffffffff, 0x00000100,
1142 	0x3c254, 0xffffffff, 0x00000100,
1143 	0x3c258, 0xffffffff, 0x00000100,
1144 	0x3c25c, 0xffffffff, 0x00000100,
1145 	0x3c260, 0xffffffff, 0x00000100,
1146 	0x3c27c, 0xffffffff, 0x00000100,
1147 	0x3c278, 0xffffffff, 0x00000100,
1148 	0x3c210, 0xffffffff, 0x06000100,
1149 	0x3c290, 0xffffffff, 0x00000100,
1150 	0x3c274, 0xffffffff, 0x00000100,
1151 	0x3c2b4, 0xffffffff, 0x00000100,
1152 	0x3c2b0, 0xffffffff, 0x00000100,
1153 	0x3c270, 0xffffffff, 0x00000100,
1154 	0x30800, 0xffffffff, 0xe0000000,
1155 	0x3c020, 0xffffffff, 0x00010000,
1156 	0x3c024, 0xffffffff, 0x00030002,
1157 	0x3c028, 0xffffffff, 0x00040007,
1158 	0x3c02c, 0xffffffff, 0x00060005,
1159 	0x3c030, 0xffffffff, 0x00090008,
1160 	0x3c034, 0xffffffff, 0x00010000,
1161 	0x3c038, 0xffffffff, 0x00030002,
1162 	0x3c03c, 0xffffffff, 0x00040007,
1163 	0x3c040, 0xffffffff, 0x00060005,
1164 	0x3c044, 0xffffffff, 0x00090008,
1165 	0x3c048, 0xffffffff, 0x00010000,
1166 	0x3c04c, 0xffffffff, 0x00030002,
1167 	0x3c050, 0xffffffff, 0x00040007,
1168 	0x3c054, 0xffffffff, 0x00060005,
1169 	0x3c058, 0xffffffff, 0x00090008,
1170 	0x3c05c, 0xffffffff, 0x00010000,
1171 	0x3c060, 0xffffffff, 0x00030002,
1172 	0x3c064, 0xffffffff, 0x00040007,
1173 	0x3c068, 0xffffffff, 0x00060005,
1174 	0x3c06c, 0xffffffff, 0x00090008,
1175 	0x3c070, 0xffffffff, 0x00010000,
1176 	0x3c074, 0xffffffff, 0x00030002,
1177 	0x3c078, 0xffffffff, 0x00040007,
1178 	0x3c07c, 0xffffffff, 0x00060005,
1179 	0x3c080, 0xffffffff, 0x00090008,
1180 	0x3c084, 0xffffffff, 0x00010000,
1181 	0x3c088, 0xffffffff, 0x00030002,
1182 	0x3c08c, 0xffffffff, 0x00040007,
1183 	0x3c090, 0xffffffff, 0x00060005,
1184 	0x3c094, 0xffffffff, 0x00090008,
1185 	0x3c098, 0xffffffff, 0x00010000,
1186 	0x3c09c, 0xffffffff, 0x00030002,
1187 	0x3c0a0, 0xffffffff, 0x00040007,
1188 	0x3c0a4, 0xffffffff, 0x00060005,
1189 	0x3c0a8, 0xffffffff, 0x00090008,
1190 	0x3c0ac, 0xffffffff, 0x00010000,
1191 	0x3c0b0, 0xffffffff, 0x00030002,
1192 	0x3c0b4, 0xffffffff, 0x00040007,
1193 	0x3c0b8, 0xffffffff, 0x00060005,
1194 	0x3c0bc, 0xffffffff, 0x00090008,
1195 	0x3c000, 0xffffffff, 0x96e00200,
1196 	0x8708, 0xffffffff, 0x00900100,
1197 	0xc424, 0xffffffff, 0x0020003f,
1198 	0x38, 0xffffffff, 0x0140001c,
1199 	0x3c, 0x000f0000, 0x000f0000,
1200 	0x220, 0xffffffff, 0xC060000C,
1201 	0x224, 0xc0000fff, 0x00000100,
1202 	0xf90, 0xffffffff, 0x00000100,
1203 	0xf98, 0x00000101, 0x00000000,
1204 	0x20a8, 0xffffffff, 0x00000104,
1205 	0x55e4, 0xff000fff, 0x00000100,
1206 	0x30cc, 0xc0000fff, 0x00000104,
1207 	0xc1e4, 0x00000001, 0x00000001,
1208 	0xd00c, 0xff000ff0, 0x00000100,
1209 	0xd80c, 0xff000ff0, 0x00000100
1210 };
1211 
1212 static const u32 kalindi_golden_spm_registers[] =
1213 {
1214 	0x30800, 0xe0ffffff, 0xe0000000
1215 };
1216 
1217 static const u32 kalindi_golden_common_registers[] =
1218 {
1219 	0xc770, 0xffffffff, 0x00000800,
1220 	0xc774, 0xffffffff, 0x00000800,
1221 	0xc798, 0xffffffff, 0x00007fbf,
1222 	0xc79c, 0xffffffff, 0x00007faf
1223 };
1224 
1225 static const u32 kalindi_golden_registers[] =
1226 {
1227 	0x3c000, 0xffffdfff, 0x6e944040,
1228 	0x55e4, 0xff607fff, 0xfc000100,
1229 	0x3c220, 0xff000fff, 0x00000100,
1230 	0x3c224, 0xff000fff, 0x00000100,
1231 	0x3c200, 0xfffc0fff, 0x00000100,
1232 	0x6ed8, 0x00010101, 0x00010000,
1233 	0x9830, 0xffffffff, 0x00000000,
1234 	0x9834, 0xf00fffff, 0x00000400,
1235 	0x5bb0, 0x000000f0, 0x00000070,
1236 	0x5bc0, 0xf0311fff, 0x80300000,
1237 	0x98f8, 0x73773777, 0x12010001,
1238 	0x98fc, 0xffffffff, 0x00000010,
1239 	0x9b7c, 0x00ff0000, 0x00fc0000,
1240 	0x8030, 0x00001f0f, 0x0000100a,
1241 	0x2f48, 0x73773777, 0x12010001,
1242 	0x2408, 0x000fffff, 0x000c007f,
1243 	0x8a14, 0xf000003f, 0x00000007,
1244 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1245 	0x30a04, 0x0000ff0f, 0x00000000,
1246 	0x28a4c, 0x07ffffff, 0x06000000,
1247 	0x4d8, 0x00000fff, 0x00000100,
1248 	0x3e78, 0x00000001, 0x00000002,
1249 	0xc768, 0x00000008, 0x00000008,
1250 	0x8c00, 0x000000ff, 0x00000003,
1251 	0x214f8, 0x01ff01ff, 0x00000002,
1252 	0x21498, 0x007ff800, 0x00200000,
1253 	0x2015c, 0xffffffff, 0x00000f40,
1254 	0x88c4, 0x001f3ae3, 0x00000082,
1255 	0x88d4, 0x0000001f, 0x00000010,
1256 	0x30934, 0xffffffff, 0x00000000
1257 };
1258 
1259 static const u32 kalindi_mgcg_cgcg_init[] =
1260 {
1261 	0xc420, 0xffffffff, 0xfffffffc,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c2a0, 0xffffffff, 0x00000100,
1264 	0x3c208, 0xffffffff, 0x00000100,
1265 	0x3c2c0, 0xffffffff, 0x00000100,
1266 	0x3c2c8, 0xffffffff, 0x00000100,
1267 	0x3c2c4, 0xffffffff, 0x00000100,
1268 	0x55e4, 0xffffffff, 0x00600100,
1269 	0x3c280, 0xffffffff, 0x00000100,
1270 	0x3c214, 0xffffffff, 0x06000100,
1271 	0x3c220, 0xffffffff, 0x00000100,
1272 	0x3c218, 0xffffffff, 0x06000100,
1273 	0x3c204, 0xffffffff, 0x00000100,
1274 	0x3c2e0, 0xffffffff, 0x00000100,
1275 	0x3c224, 0xffffffff, 0x00000100,
1276 	0x3c200, 0xffffffff, 0x00000100,
1277 	0x3c230, 0xffffffff, 0x00000100,
1278 	0x3c234, 0xffffffff, 0x00000100,
1279 	0x3c250, 0xffffffff, 0x00000100,
1280 	0x3c254, 0xffffffff, 0x00000100,
1281 	0x3c258, 0xffffffff, 0x00000100,
1282 	0x3c25c, 0xffffffff, 0x00000100,
1283 	0x3c260, 0xffffffff, 0x00000100,
1284 	0x3c27c, 0xffffffff, 0x00000100,
1285 	0x3c278, 0xffffffff, 0x00000100,
1286 	0x3c210, 0xffffffff, 0x06000100,
1287 	0x3c290, 0xffffffff, 0x00000100,
1288 	0x3c274, 0xffffffff, 0x00000100,
1289 	0x3c2b4, 0xffffffff, 0x00000100,
1290 	0x3c2b0, 0xffffffff, 0x00000100,
1291 	0x3c270, 0xffffffff, 0x00000100,
1292 	0x30800, 0xffffffff, 0xe0000000,
1293 	0x3c020, 0xffffffff, 0x00010000,
1294 	0x3c024, 0xffffffff, 0x00030002,
1295 	0x3c028, 0xffffffff, 0x00040007,
1296 	0x3c02c, 0xffffffff, 0x00060005,
1297 	0x3c030, 0xffffffff, 0x00090008,
1298 	0x3c034, 0xffffffff, 0x00010000,
1299 	0x3c038, 0xffffffff, 0x00030002,
1300 	0x3c03c, 0xffffffff, 0x00040007,
1301 	0x3c040, 0xffffffff, 0x00060005,
1302 	0x3c044, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0x20a8, 0xffffffff, 0x00000104,
1311 	0x55e4, 0xff000fff, 0x00000100,
1312 	0x30cc, 0xc0000fff, 0x00000104,
1313 	0xc1e4, 0x00000001, 0x00000001,
1314 	0xd00c, 0xff000ff0, 0x00000100,
1315 	0xd80c, 0xff000ff0, 0x00000100
1316 };
1317 
1318 static const u32 hawaii_golden_spm_registers[] =
1319 {
1320 	0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322 
1323 static const u32 hawaii_golden_common_registers[] =
1324 {
1325 	0x30800, 0xffffffff, 0xe0000000,
1326 	0x28350, 0xffffffff, 0x3a00161a,
1327 	0x28354, 0xffffffff, 0x0000002e,
1328 	0x9a10, 0xffffffff, 0x00018208,
1329 	0x98f8, 0xffffffff, 0x12011003
1330 };
1331 
1332 static const u32 hawaii_golden_registers[] =
1333 {
1334 	0x3354, 0x00000333, 0x00000333,
1335 	0x9a10, 0x00010000, 0x00058208,
1336 	0x9830, 0xffffffff, 0x00000000,
1337 	0x9834, 0xf00fffff, 0x00000400,
1338 	0x9838, 0x0002021c, 0x00020200,
1339 	0xc78, 0x00000080, 0x00000000,
1340 	0x5bb0, 0x000000f0, 0x00000070,
1341 	0x5bc0, 0xf0311fff, 0x80300000,
1342 	0x350c, 0x00810000, 0x408af000,
1343 	0x7030, 0x31000111, 0x00000011,
1344 	0x2f48, 0x73773777, 0x12010001,
1345 	0x2120, 0x0000007f, 0x0000001b,
1346 	0x21dc, 0x00007fb6, 0x00002191,
1347 	0x3628, 0x0000003f, 0x0000000a,
1348 	0x362c, 0x0000003f, 0x0000000a,
1349 	0x2ae4, 0x00073ffe, 0x000022a2,
1350 	0x240c, 0x000007ff, 0x00000000,
1351 	0x8bf0, 0x00002001, 0x00000001,
1352 	0x8b24, 0xffffffff, 0x00ffffff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x3e78, 0x00000001, 0x00000002,
1356 	0xc768, 0x00000008, 0x00000008,
1357 	0xc770, 0x00000f00, 0x00000800,
1358 	0xc774, 0x00000f00, 0x00000800,
1359 	0xc798, 0x00ffffff, 0x00ff7fbf,
1360 	0xc79c, 0x00ffffff, 0x00ff7faf,
1361 	0x8c00, 0x000000ff, 0x00000800,
1362 	0xe40, 0x00001fff, 0x00001fff,
1363 	0x9060, 0x0000007f, 0x00000020,
1364 	0x9508, 0x00010000, 0x00010000,
1365 	0xae00, 0x00100000, 0x000ff07c,
1366 	0xac14, 0x000003ff, 0x0000000f,
1367 	0xac10, 0xffffffff, 0x7564fdec,
1368 	0xac0c, 0xffffffff, 0x3120b9a8,
1369 	0xac08, 0x20000000, 0x0f9c0000
1370 };
1371 
1372 static const u32 hawaii_mgcg_cgcg_init[] =
1373 {
1374 	0xc420, 0xffffffff, 0xfffffffd,
1375 	0x30800, 0xffffffff, 0xe0000000,
1376 	0x3c2a0, 0xffffffff, 0x00000100,
1377 	0x3c208, 0xffffffff, 0x00000100,
1378 	0x3c2c0, 0xffffffff, 0x00000100,
1379 	0x3c2c8, 0xffffffff, 0x00000100,
1380 	0x3c2c4, 0xffffffff, 0x00000100,
1381 	0x55e4, 0xffffffff, 0x00200100,
1382 	0x3c280, 0xffffffff, 0x00000100,
1383 	0x3c214, 0xffffffff, 0x06000100,
1384 	0x3c220, 0xffffffff, 0x00000100,
1385 	0x3c218, 0xffffffff, 0x06000100,
1386 	0x3c204, 0xffffffff, 0x00000100,
1387 	0x3c2e0, 0xffffffff, 0x00000100,
1388 	0x3c224, 0xffffffff, 0x00000100,
1389 	0x3c200, 0xffffffff, 0x00000100,
1390 	0x3c230, 0xffffffff, 0x00000100,
1391 	0x3c234, 0xffffffff, 0x00000100,
1392 	0x3c250, 0xffffffff, 0x00000100,
1393 	0x3c254, 0xffffffff, 0x00000100,
1394 	0x3c258, 0xffffffff, 0x00000100,
1395 	0x3c25c, 0xffffffff, 0x00000100,
1396 	0x3c260, 0xffffffff, 0x00000100,
1397 	0x3c27c, 0xffffffff, 0x00000100,
1398 	0x3c278, 0xffffffff, 0x00000100,
1399 	0x3c210, 0xffffffff, 0x06000100,
1400 	0x3c290, 0xffffffff, 0x00000100,
1401 	0x3c274, 0xffffffff, 0x00000100,
1402 	0x3c2b4, 0xffffffff, 0x00000100,
1403 	0x3c2b0, 0xffffffff, 0x00000100,
1404 	0x3c270, 0xffffffff, 0x00000100,
1405 	0x30800, 0xffffffff, 0xe0000000,
1406 	0x3c020, 0xffffffff, 0x00010000,
1407 	0x3c024, 0xffffffff, 0x00030002,
1408 	0x3c028, 0xffffffff, 0x00040007,
1409 	0x3c02c, 0xffffffff, 0x00060005,
1410 	0x3c030, 0xffffffff, 0x00090008,
1411 	0x3c034, 0xffffffff, 0x00010000,
1412 	0x3c038, 0xffffffff, 0x00030002,
1413 	0x3c03c, 0xffffffff, 0x00040007,
1414 	0x3c040, 0xffffffff, 0x00060005,
1415 	0x3c044, 0xffffffff, 0x00090008,
1416 	0x3c048, 0xffffffff, 0x00010000,
1417 	0x3c04c, 0xffffffff, 0x00030002,
1418 	0x3c050, 0xffffffff, 0x00040007,
1419 	0x3c054, 0xffffffff, 0x00060005,
1420 	0x3c058, 0xffffffff, 0x00090008,
1421 	0x3c05c, 0xffffffff, 0x00010000,
1422 	0x3c060, 0xffffffff, 0x00030002,
1423 	0x3c064, 0xffffffff, 0x00040007,
1424 	0x3c068, 0xffffffff, 0x00060005,
1425 	0x3c06c, 0xffffffff, 0x00090008,
1426 	0x3c070, 0xffffffff, 0x00010000,
1427 	0x3c074, 0xffffffff, 0x00030002,
1428 	0x3c078, 0xffffffff, 0x00040007,
1429 	0x3c07c, 0xffffffff, 0x00060005,
1430 	0x3c080, 0xffffffff, 0x00090008,
1431 	0x3c084, 0xffffffff, 0x00010000,
1432 	0x3c088, 0xffffffff, 0x00030002,
1433 	0x3c08c, 0xffffffff, 0x00040007,
1434 	0x3c090, 0xffffffff, 0x00060005,
1435 	0x3c094, 0xffffffff, 0x00090008,
1436 	0x3c098, 0xffffffff, 0x00010000,
1437 	0x3c09c, 0xffffffff, 0x00030002,
1438 	0x3c0a0, 0xffffffff, 0x00040007,
1439 	0x3c0a4, 0xffffffff, 0x00060005,
1440 	0x3c0a8, 0xffffffff, 0x00090008,
1441 	0x3c0ac, 0xffffffff, 0x00010000,
1442 	0x3c0b0, 0xffffffff, 0x00030002,
1443 	0x3c0b4, 0xffffffff, 0x00040007,
1444 	0x3c0b8, 0xffffffff, 0x00060005,
1445 	0x3c0bc, 0xffffffff, 0x00090008,
1446 	0x3c0c0, 0xffffffff, 0x00010000,
1447 	0x3c0c4, 0xffffffff, 0x00030002,
1448 	0x3c0c8, 0xffffffff, 0x00040007,
1449 	0x3c0cc, 0xffffffff, 0x00060005,
1450 	0x3c0d0, 0xffffffff, 0x00090008,
1451 	0x3c0d4, 0xffffffff, 0x00010000,
1452 	0x3c0d8, 0xffffffff, 0x00030002,
1453 	0x3c0dc, 0xffffffff, 0x00040007,
1454 	0x3c0e0, 0xffffffff, 0x00060005,
1455 	0x3c0e4, 0xffffffff, 0x00090008,
1456 	0x3c0e8, 0xffffffff, 0x00010000,
1457 	0x3c0ec, 0xffffffff, 0x00030002,
1458 	0x3c0f0, 0xffffffff, 0x00040007,
1459 	0x3c0f4, 0xffffffff, 0x00060005,
1460 	0x3c0f8, 0xffffffff, 0x00090008,
1461 	0xc318, 0xffffffff, 0x00020200,
1462 	0x3350, 0xffffffff, 0x00000200,
1463 	0x15c0, 0xffffffff, 0x00000400,
1464 	0x55e8, 0xffffffff, 0x00000000,
1465 	0x2f50, 0xffffffff, 0x00000902,
1466 	0x3c000, 0xffffffff, 0x96940200,
1467 	0x8708, 0xffffffff, 0x00900100,
1468 	0xc424, 0xffffffff, 0x0020003f,
1469 	0x38, 0xffffffff, 0x0140001c,
1470 	0x3c, 0x000f0000, 0x000f0000,
1471 	0x220, 0xffffffff, 0xc060000c,
1472 	0x224, 0xc0000fff, 0x00000100,
1473 	0xf90, 0xffffffff, 0x00000100,
1474 	0xf98, 0x00000101, 0x00000000,
1475 	0x20a8, 0xffffffff, 0x00000104,
1476 	0x55e4, 0xff000fff, 0x00000100,
1477 	0x30cc, 0xc0000fff, 0x00000104,
1478 	0xc1e4, 0x00000001, 0x00000001,
1479 	0xd00c, 0xff000ff0, 0x00000100,
1480 	0xd80c, 0xff000ff0, 0x00000100
1481 };
1482 
1483 static const u32 godavari_golden_registers[] =
1484 {
1485 	0x55e4, 0xff607fff, 0xfc000100,
1486 	0x6ed8, 0x00010101, 0x00010000,
1487 	0x9830, 0xffffffff, 0x00000000,
1488 	0x98302, 0xf00fffff, 0x00000400,
1489 	0x6130, 0xffffffff, 0x00010000,
1490 	0x5bb0, 0x000000f0, 0x00000070,
1491 	0x5bc0, 0xf0311fff, 0x80300000,
1492 	0x98f8, 0x73773777, 0x12010001,
1493 	0x98fc, 0xffffffff, 0x00000010,
1494 	0x8030, 0x00001f0f, 0x0000100a,
1495 	0x2f48, 0x73773777, 0x12010001,
1496 	0x2408, 0x000fffff, 0x000c007f,
1497 	0x8a14, 0xf000003f, 0x00000007,
1498 	0x8b24, 0xffffffff, 0x00ff0fff,
1499 	0x30a04, 0x0000ff0f, 0x00000000,
1500 	0x28a4c, 0x07ffffff, 0x06000000,
1501 	0x4d8, 0x00000fff, 0x00000100,
1502 	0xd014, 0x00010000, 0x00810001,
1503 	0xd814, 0x00010000, 0x00810001,
1504 	0x3e78, 0x00000001, 0x00000002,
1505 	0xc768, 0x00000008, 0x00000008,
1506 	0xc770, 0x00000f00, 0x00000800,
1507 	0xc774, 0x00000f00, 0x00000800,
1508 	0xc798, 0x00ffffff, 0x00ff7fbf,
1509 	0xc79c, 0x00ffffff, 0x00ff7faf,
1510 	0x8c00, 0x000000ff, 0x00000001,
1511 	0x214f8, 0x01ff01ff, 0x00000002,
1512 	0x21498, 0x007ff800, 0x00200000,
1513 	0x2015c, 0xffffffff, 0x00000f40,
1514 	0x88c4, 0x001f3ae3, 0x00000082,
1515 	0x88d4, 0x0000001f, 0x00000010,
1516 	0x30934, 0xffffffff, 0x00000000
1517 };
1518 
1519 
1520 static void cik_init_golden_registers(struct radeon_device *rdev)
1521 {
1522 	switch (rdev->family) {
1523 	case CHIP_BONAIRE:
1524 		radeon_program_register_sequence(rdev,
1525 						 bonaire_mgcg_cgcg_init,
1526 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1527 		radeon_program_register_sequence(rdev,
1528 						 bonaire_golden_registers,
1529 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1530 		radeon_program_register_sequence(rdev,
1531 						 bonaire_golden_common_registers,
1532 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1533 		radeon_program_register_sequence(rdev,
1534 						 bonaire_golden_spm_registers,
1535 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1536 		break;
1537 	case CHIP_KABINI:
1538 		radeon_program_register_sequence(rdev,
1539 						 kalindi_mgcg_cgcg_init,
1540 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1541 		radeon_program_register_sequence(rdev,
1542 						 kalindi_golden_registers,
1543 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1544 		radeon_program_register_sequence(rdev,
1545 						 kalindi_golden_common_registers,
1546 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1547 		radeon_program_register_sequence(rdev,
1548 						 kalindi_golden_spm_registers,
1549 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1550 		break;
1551 	case CHIP_MULLINS:
1552 		radeon_program_register_sequence(rdev,
1553 						 kalindi_mgcg_cgcg_init,
1554 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1555 		radeon_program_register_sequence(rdev,
1556 						 godavari_golden_registers,
1557 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1558 		radeon_program_register_sequence(rdev,
1559 						 kalindi_golden_common_registers,
1560 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1561 		radeon_program_register_sequence(rdev,
1562 						 kalindi_golden_spm_registers,
1563 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1564 		break;
1565 	case CHIP_KAVERI:
1566 		radeon_program_register_sequence(rdev,
1567 						 spectre_mgcg_cgcg_init,
1568 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1569 		radeon_program_register_sequence(rdev,
1570 						 spectre_golden_registers,
1571 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1572 		radeon_program_register_sequence(rdev,
1573 						 spectre_golden_common_registers,
1574 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1575 		radeon_program_register_sequence(rdev,
1576 						 spectre_golden_spm_registers,
1577 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1578 		break;
1579 	case CHIP_HAWAII:
1580 		radeon_program_register_sequence(rdev,
1581 						 hawaii_mgcg_cgcg_init,
1582 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1583 		radeon_program_register_sequence(rdev,
1584 						 hawaii_golden_registers,
1585 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1586 		radeon_program_register_sequence(rdev,
1587 						 hawaii_golden_common_registers,
1588 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1589 		radeon_program_register_sequence(rdev,
1590 						 hawaii_golden_spm_registers,
1591 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1592 		break;
1593 	default:
1594 		break;
1595 	}
1596 }
1597 
1598 /**
1599  * cik_get_xclk - get the xclk
1600  *
1601  * @rdev: radeon_device pointer
1602  *
1603  * Returns the reference clock used by the gfx engine
1604  * (CIK).
1605  */
1606 u32 cik_get_xclk(struct radeon_device *rdev)
1607 {
1608         u32 reference_clock = rdev->clock.spll.reference_freq;
1609 
1610 	if (rdev->flags & RADEON_IS_IGP) {
1611 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1612 			return reference_clock / 2;
1613 	} else {
1614 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1615 			return reference_clock / 4;
1616 	}
1617 	return reference_clock;
1618 }
1619 
1620 /**
1621  * cik_mm_rdoorbell - read a doorbell dword
1622  *
1623  * @rdev: radeon_device pointer
1624  * @index: doorbell index
1625  *
1626  * Returns the value in the doorbell aperture at the
1627  * requested doorbell index (CIK).
1628  */
1629 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1630 {
1631 	if (index < rdev->doorbell.num_doorbells) {
1632 		return readl(rdev->doorbell.ptr + index);
1633 	} else {
1634 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1635 		return 0;
1636 	}
1637 }
1638 
1639 /**
1640  * cik_mm_wdoorbell - write a doorbell dword
1641  *
1642  * @rdev: radeon_device pointer
1643  * @index: doorbell index
1644  * @v: value to write
1645  *
1646  * Writes @v to the doorbell aperture at the
1647  * requested doorbell index (CIK).
1648  */
1649 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1650 {
1651 	if (index < rdev->doorbell.num_doorbells) {
1652 		writel(v, rdev->doorbell.ptr + index);
1653 	} else {
1654 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1655 	}
1656 }
1657 
1658 #define BONAIRE_IO_MC_REGS_SIZE 36
1659 
1660 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1661 {
1662 	{0x00000070, 0x04400000},
1663 	{0x00000071, 0x80c01803},
1664 	{0x00000072, 0x00004004},
1665 	{0x00000073, 0x00000100},
1666 	{0x00000074, 0x00ff0000},
1667 	{0x00000075, 0x34000000},
1668 	{0x00000076, 0x08000014},
1669 	{0x00000077, 0x00cc08ec},
1670 	{0x00000078, 0x00000400},
1671 	{0x00000079, 0x00000000},
1672 	{0x0000007a, 0x04090000},
1673 	{0x0000007c, 0x00000000},
1674 	{0x0000007e, 0x4408a8e8},
1675 	{0x0000007f, 0x00000304},
1676 	{0x00000080, 0x00000000},
1677 	{0x00000082, 0x00000001},
1678 	{0x00000083, 0x00000002},
1679 	{0x00000084, 0xf3e4f400},
1680 	{0x00000085, 0x052024e3},
1681 	{0x00000087, 0x00000000},
1682 	{0x00000088, 0x01000000},
1683 	{0x0000008a, 0x1c0a0000},
1684 	{0x0000008b, 0xff010000},
1685 	{0x0000008d, 0xffffefff},
1686 	{0x0000008e, 0xfff3efff},
1687 	{0x0000008f, 0xfff3efbf},
1688 	{0x00000092, 0xf7ffffff},
1689 	{0x00000093, 0xffffff7f},
1690 	{0x00000095, 0x00101101},
1691 	{0x00000096, 0x00000fff},
1692 	{0x00000097, 0x00116fff},
1693 	{0x00000098, 0x60010000},
1694 	{0x00000099, 0x10010000},
1695 	{0x0000009a, 0x00006000},
1696 	{0x0000009b, 0x00001000},
1697 	{0x0000009f, 0x00b48000}
1698 };
1699 
1700 #define HAWAII_IO_MC_REGS_SIZE 22
1701 
1702 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1703 {
1704 	{0x0000007d, 0x40000000},
1705 	{0x0000007e, 0x40180304},
1706 	{0x0000007f, 0x0000ff00},
1707 	{0x00000081, 0x00000000},
1708 	{0x00000083, 0x00000800},
1709 	{0x00000086, 0x00000000},
1710 	{0x00000087, 0x00000100},
1711 	{0x00000088, 0x00020100},
1712 	{0x00000089, 0x00000000},
1713 	{0x0000008b, 0x00040000},
1714 	{0x0000008c, 0x00000100},
1715 	{0x0000008e, 0xff010000},
1716 	{0x00000090, 0xffffefff},
1717 	{0x00000091, 0xfff3efff},
1718 	{0x00000092, 0xfff3efbf},
1719 	{0x00000093, 0xf7ffffff},
1720 	{0x00000094, 0xffffff7f},
1721 	{0x00000095, 0x00000fff},
1722 	{0x00000096, 0x00116fff},
1723 	{0x00000097, 0x60010000},
1724 	{0x00000098, 0x10010000},
1725 	{0x0000009f, 0x00c79000}
1726 };
1727 
1728 
1729 /**
1730  * cik_srbm_select - select specific register instances
1731  *
1732  * @rdev: radeon_device pointer
1733  * @me: selected ME (micro engine)
1734  * @pipe: pipe
1735  * @queue: queue
1736  * @vmid: VMID
1737  *
1738  * Switches the currently active registers instances.  Some
1739  * registers are instanced per VMID, others are instanced per
1740  * me/pipe/queue combination.
1741  */
1742 static void cik_srbm_select(struct radeon_device *rdev,
1743 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1744 {
1745 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1746 			     MEID(me & 0x3) |
1747 			     VMID(vmid & 0xf) |
1748 			     QUEUEID(queue & 0x7));
1749 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1750 }
1751 
1752 /* ucode loading */
1753 /**
1754  * ci_mc_load_microcode - load MC ucode into the hw
1755  *
1756  * @rdev: radeon_device pointer
1757  *
1758  * Load the GDDR MC ucode into the hw (CIK).
1759  * Returns 0 on success, error on failure.
1760  */
1761 int ci_mc_load_microcode(struct radeon_device *rdev)
1762 {
1763 	const __be32 *fw_data;
1764 	u32 running, blackout = 0;
1765 	u32 *io_mc_regs;
1766 	int i, regs_size, ucode_size;
1767 
1768 	if (!rdev->mc_fw)
1769 		return -EINVAL;
1770 
1771 	ucode_size = rdev->mc_fw->size / 4;
1772 
1773 	switch (rdev->family) {
1774 	case CHIP_BONAIRE:
1775 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1776 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1777 		break;
1778 	case CHIP_HAWAII:
1779 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1780 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1781 		break;
1782 	default:
1783 		return -EINVAL;
1784 	}
1785 
1786 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1787 
1788 	if (running == 0) {
1789 		if (running) {
1790 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1791 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1792 		}
1793 
1794 		/* reset the engine and set to writable */
1795 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1796 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1797 
1798 		/* load mc io regs */
1799 		for (i = 0; i < regs_size; i++) {
1800 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1801 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1802 		}
1803 		/* load the MC ucode */
1804 		fw_data = (const __be32 *)rdev->mc_fw->data;
1805 		for (i = 0; i < ucode_size; i++)
1806 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1807 
1808 		/* put the engine back into the active state */
1809 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1810 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1811 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1812 
1813 		/* wait for training to complete */
1814 		for (i = 0; i < rdev->usec_timeout; i++) {
1815 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1816 				break;
1817 			udelay(1);
1818 		}
1819 		for (i = 0; i < rdev->usec_timeout; i++) {
1820 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1821 				break;
1822 			udelay(1);
1823 		}
1824 
1825 		if (running)
1826 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1827 	}
1828 
1829 	return 0;
1830 }
1831 
1832 /**
1833  * cik_init_microcode - load ucode images from disk
1834  *
1835  * @rdev: radeon_device pointer
1836  *
1837  * Use the firmware interface to load the ucode images into
1838  * the driver (not loaded into hw).
1839  * Returns 0 on success, error on failure.
1840  */
1841 static int cik_init_microcode(struct radeon_device *rdev)
1842 {
1843 	const char *chip_name;
1844 	size_t pfp_req_size, me_req_size, ce_req_size,
1845 		mec_req_size, rlc_req_size, mc_req_size = 0,
1846 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1847 	char fw_name[30];
1848 	int err;
1849 
1850 	DRM_DEBUG("\n");
1851 
1852 	switch (rdev->family) {
1853 	case CHIP_BONAIRE:
1854 		chip_name = "BONAIRE";
1855 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1856 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1857 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1858 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1859 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1860 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1861 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1862 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1863 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1864 		break;
1865 	case CHIP_HAWAII:
1866 		chip_name = "HAWAII";
1867 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1868 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1869 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1870 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1871 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1872 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1873 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1874 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1875 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1876 		break;
1877 	case CHIP_KAVERI:
1878 		chip_name = "KAVERI";
1879 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1880 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1881 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1882 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1883 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1884 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1885 		break;
1886 	case CHIP_KABINI:
1887 		chip_name = "KABINI";
1888 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1889 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1890 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1891 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1892 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1893 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1894 		break;
1895 	case CHIP_MULLINS:
1896 		chip_name = "MULLINS";
1897 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1898 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1899 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1900 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1901 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1902 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1903 		break;
1904 	default: BUG();
1905 	}
1906 
1907 	DRM_INFO("Loading %s Microcode\n", chip_name);
1908 
1909 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1910 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1911 	if (err)
1912 		goto out;
1913 	if (rdev->pfp_fw->size != pfp_req_size) {
1914 		printk(KERN_ERR
1915 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1916 		       rdev->pfp_fw->size, fw_name);
1917 		err = -EINVAL;
1918 		goto out;
1919 	}
1920 
1921 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1922 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1923 	if (err)
1924 		goto out;
1925 	if (rdev->me_fw->size != me_req_size) {
1926 		printk(KERN_ERR
1927 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1928 		       rdev->me_fw->size, fw_name);
1929 		err = -EINVAL;
1930 	}
1931 
1932 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1933 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1934 	if (err)
1935 		goto out;
1936 	if (rdev->ce_fw->size != ce_req_size) {
1937 		printk(KERN_ERR
1938 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1939 		       rdev->ce_fw->size, fw_name);
1940 		err = -EINVAL;
1941 	}
1942 
1943 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1944 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1945 	if (err)
1946 		goto out;
1947 	if (rdev->mec_fw->size != mec_req_size) {
1948 		printk(KERN_ERR
1949 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1950 		       rdev->mec_fw->size, fw_name);
1951 		err = -EINVAL;
1952 	}
1953 
1954 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1955 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1956 	if (err)
1957 		goto out;
1958 	if (rdev->rlc_fw->size != rlc_req_size) {
1959 		printk(KERN_ERR
1960 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1961 		       rdev->rlc_fw->size, fw_name);
1962 		err = -EINVAL;
1963 	}
1964 
1965 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1966 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1967 	if (err)
1968 		goto out;
1969 	if (rdev->sdma_fw->size != sdma_req_size) {
1970 		printk(KERN_ERR
1971 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1972 		       rdev->sdma_fw->size, fw_name);
1973 		err = -EINVAL;
1974 	}
1975 
1976 	/* No SMC, MC ucode on APUs */
1977 	if (!(rdev->flags & RADEON_IS_IGP)) {
1978 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1979 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1980 		if (err) {
1981 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1982 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1983 			if (err)
1984 				goto out;
1985 		}
1986 		if ((rdev->mc_fw->size != mc_req_size) &&
1987 		    (rdev->mc_fw->size != mc2_req_size)){
1988 			printk(KERN_ERR
1989 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1990 			       rdev->mc_fw->size, fw_name);
1991 			err = -EINVAL;
1992 		}
1993 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1994 
1995 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1996 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1997 		if (err) {
1998 			printk(KERN_ERR
1999 			       "smc: error loading firmware \"%s\"\n",
2000 			       fw_name);
2001 			release_firmware(rdev->smc_fw);
2002 			rdev->smc_fw = NULL;
2003 			err = 0;
2004 		} else if (rdev->smc_fw->size != smc_req_size) {
2005 			printk(KERN_ERR
2006 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2007 			       rdev->smc_fw->size, fw_name);
2008 			err = -EINVAL;
2009 		}
2010 	}
2011 
2012 out:
2013 	if (err) {
2014 		if (err != -EINVAL)
2015 			printk(KERN_ERR
2016 			       "cik_cp: Failed to load firmware \"%s\"\n",
2017 			       fw_name);
2018 		release_firmware(rdev->pfp_fw);
2019 		rdev->pfp_fw = NULL;
2020 		release_firmware(rdev->me_fw);
2021 		rdev->me_fw = NULL;
2022 		release_firmware(rdev->ce_fw);
2023 		rdev->ce_fw = NULL;
2024 		release_firmware(rdev->rlc_fw);
2025 		rdev->rlc_fw = NULL;
2026 		release_firmware(rdev->mc_fw);
2027 		rdev->mc_fw = NULL;
2028 		release_firmware(rdev->smc_fw);
2029 		rdev->smc_fw = NULL;
2030 	}
2031 	return err;
2032 }
2033 
2034 /*
2035  * Core functions
2036  */
2037 /**
2038  * cik_tiling_mode_table_init - init the hw tiling table
2039  *
2040  * @rdev: radeon_device pointer
2041  *
2042  * Starting with SI, the tiling setup is done globally in a
2043  * set of 32 tiling modes.  Rather than selecting each set of
2044  * parameters per surface as on older asics, we just select
2045  * which index in the tiling table we want to use, and the
2046  * surface uses those parameters (CIK).
2047  */
2048 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2049 {
2050 	const u32 num_tile_mode_states = 32;
2051 	const u32 num_secondary_tile_mode_states = 16;
2052 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2053 	u32 num_pipe_configs;
2054 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2055 		rdev->config.cik.max_shader_engines;
2056 
2057 	switch (rdev->config.cik.mem_row_size_in_kb) {
2058 	case 1:
2059 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2060 		break;
2061 	case 2:
2062 	default:
2063 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2064 		break;
2065 	case 4:
2066 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2067 		break;
2068 	}
2069 
2070 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2071 	if (num_pipe_configs > 8)
2072 		num_pipe_configs = 16;
2073 
2074 	if (num_pipe_configs == 16) {
2075 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2076 			switch (reg_offset) {
2077 			case 0:
2078 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2080 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2081 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2082 				break;
2083 			case 1:
2084 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2086 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2088 				break;
2089 			case 2:
2090 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2092 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2093 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2094 				break;
2095 			case 3:
2096 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2098 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2099 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2100 				break;
2101 			case 4:
2102 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2104 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2105 						 TILE_SPLIT(split_equal_to_row_size));
2106 				break;
2107 			case 5:
2108 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111 				break;
2112 			case 6:
2113 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2114 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2115 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2117 				break;
2118 			case 7:
2119 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2120 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2121 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2122 						 TILE_SPLIT(split_equal_to_row_size));
2123 				break;
2124 			case 8:
2125 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2126 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2127 				break;
2128 			case 9:
2129 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2130 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2131 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2132 				break;
2133 			case 10:
2134 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2136 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2137 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 				break;
2139 			case 11:
2140 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2141 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2142 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2143 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144 				break;
2145 			case 12:
2146 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2149 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2150 				break;
2151 			case 13:
2152 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2154 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2155 				break;
2156 			case 14:
2157 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2160 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 				break;
2162 			case 16:
2163 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2164 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2166 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 				break;
2168 			case 17:
2169 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2170 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2172 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173 				break;
2174 			case 27:
2175 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2177 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2178 				break;
2179 			case 28:
2180 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2182 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2183 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 				break;
2185 			case 29:
2186 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2188 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2189 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 				break;
2191 			case 30:
2192 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2194 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2195 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 				break;
2197 			default:
2198 				gb_tile_moden = 0;
2199 				break;
2200 			}
2201 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2202 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2203 		}
2204 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2205 			switch (reg_offset) {
2206 			case 0:
2207 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210 						 NUM_BANKS(ADDR_SURF_16_BANK));
2211 				break;
2212 			case 1:
2213 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2214 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2215 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216 						 NUM_BANKS(ADDR_SURF_16_BANK));
2217 				break;
2218 			case 2:
2219 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2221 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2222 						 NUM_BANKS(ADDR_SURF_16_BANK));
2223 				break;
2224 			case 3:
2225 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2228 						 NUM_BANKS(ADDR_SURF_16_BANK));
2229 				break;
2230 			case 4:
2231 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2233 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2234 						 NUM_BANKS(ADDR_SURF_8_BANK));
2235 				break;
2236 			case 5:
2237 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2240 						 NUM_BANKS(ADDR_SURF_4_BANK));
2241 				break;
2242 			case 6:
2243 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2246 						 NUM_BANKS(ADDR_SURF_2_BANK));
2247 				break;
2248 			case 8:
2249 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 						 NUM_BANKS(ADDR_SURF_16_BANK));
2253 				break;
2254 			case 9:
2255 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 						 NUM_BANKS(ADDR_SURF_16_BANK));
2259 				break;
2260 			case 10:
2261 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK));
2265 				break;
2266 			case 11:
2267 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 						 NUM_BANKS(ADDR_SURF_8_BANK));
2271 				break;
2272 			case 12:
2273 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2276 						 NUM_BANKS(ADDR_SURF_4_BANK));
2277 				break;
2278 			case 13:
2279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 						 NUM_BANKS(ADDR_SURF_2_BANK));
2283 				break;
2284 			case 14:
2285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2288 						 NUM_BANKS(ADDR_SURF_2_BANK));
2289 				break;
2290 			default:
2291 				gb_tile_moden = 0;
2292 				break;
2293 			}
2294 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2295 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2296 		}
2297 	} else if (num_pipe_configs == 8) {
2298 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2299 			switch (reg_offset) {
2300 			case 0:
2301 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2304 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2305 				break;
2306 			case 1:
2307 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2309 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2310 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2311 				break;
2312 			case 2:
2313 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2315 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2316 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2317 				break;
2318 			case 3:
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2322 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2323 				break;
2324 			case 4:
2325 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2327 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2328 						 TILE_SPLIT(split_equal_to_row_size));
2329 				break;
2330 			case 5:
2331 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2332 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2333 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334 				break;
2335 			case 6:
2336 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2337 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2339 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2340 				break;
2341 			case 7:
2342 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2343 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2345 						 TILE_SPLIT(split_equal_to_row_size));
2346 				break;
2347 			case 8:
2348 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2349 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2350 				break;
2351 			case 9:
2352 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2354 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2355 				break;
2356 			case 10:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2360 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 				break;
2362 			case 11:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 				break;
2368 			case 12:
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2372 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 				break;
2374 			case 13:
2375 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2376 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2377 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2378 				break;
2379 			case 14:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2383 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384 				break;
2385 			case 16:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 				break;
2391 			case 17:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 				break;
2397 			case 27:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2400 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2401 				break;
2402 			case 28:
2403 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2406 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407 				break;
2408 			case 29:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413 				break;
2414 			case 30:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2418 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2419 				break;
2420 			default:
2421 				gb_tile_moden = 0;
2422 				break;
2423 			}
2424 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2425 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2426 		}
2427 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2428 			switch (reg_offset) {
2429 			case 0:
2430 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK));
2434 				break;
2435 			case 1:
2436 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2438 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439 						 NUM_BANKS(ADDR_SURF_16_BANK));
2440 				break;
2441 			case 2:
2442 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2444 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 						 NUM_BANKS(ADDR_SURF_16_BANK));
2446 				break;
2447 			case 3:
2448 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451 						 NUM_BANKS(ADDR_SURF_16_BANK));
2452 				break;
2453 			case 4:
2454 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 						 NUM_BANKS(ADDR_SURF_8_BANK));
2458 				break;
2459 			case 5:
2460 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 						 NUM_BANKS(ADDR_SURF_4_BANK));
2464 				break;
2465 			case 6:
2466 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 						 NUM_BANKS(ADDR_SURF_2_BANK));
2470 				break;
2471 			case 8:
2472 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 						 NUM_BANKS(ADDR_SURF_16_BANK));
2476 				break;
2477 			case 9:
2478 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 						 NUM_BANKS(ADDR_SURF_16_BANK));
2482 				break;
2483 			case 10:
2484 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 						 NUM_BANKS(ADDR_SURF_16_BANK));
2488 				break;
2489 			case 11:
2490 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK));
2494 				break;
2495 			case 12:
2496 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 						 NUM_BANKS(ADDR_SURF_8_BANK));
2500 				break;
2501 			case 13:
2502 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 						 NUM_BANKS(ADDR_SURF_4_BANK));
2506 				break;
2507 			case 14:
2508 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511 						 NUM_BANKS(ADDR_SURF_2_BANK));
2512 				break;
2513 			default:
2514 				gb_tile_moden = 0;
2515 				break;
2516 			}
2517 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2518 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2519 		}
2520 	} else if (num_pipe_configs == 4) {
2521 		if (num_rbs == 4) {
2522 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2523 				switch (reg_offset) {
2524 				case 0:
2525 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2529 					break;
2530 				case 1:
2531 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2535 					break;
2536 				case 2:
2537 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2541 					break;
2542 				case 3:
2543 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2547 					break;
2548 				case 4:
2549 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552 							 TILE_SPLIT(split_equal_to_row_size));
2553 					break;
2554 				case 5:
2555 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2557 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558 					break;
2559 				case 6:
2560 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 					break;
2565 				case 7:
2566 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2567 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2568 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2569 							 TILE_SPLIT(split_equal_to_row_size));
2570 					break;
2571 				case 8:
2572 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2573 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2574 					break;
2575 				case 9:
2576 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2579 					break;
2580 				case 10:
2581 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 					break;
2586 				case 11:
2587 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2588 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2590 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591 					break;
2592 				case 12:
2593 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2594 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 					break;
2598 				case 13:
2599 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2602 					break;
2603 				case 14:
2604 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 					break;
2609 				case 16:
2610 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2613 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 					break;
2615 				case 17:
2616 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 					break;
2621 				case 27:
2622 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2624 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2625 					break;
2626 				case 28:
2627 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2628 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 					break;
2632 				case 29:
2633 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 					break;
2638 				case 30:
2639 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2640 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 					break;
2644 				default:
2645 					gb_tile_moden = 0;
2646 					break;
2647 				}
2648 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2649 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2650 			}
2651 		} else if (num_rbs < 4) {
2652 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2653 				switch (reg_offset) {
2654 				case 0:
2655 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2657 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2659 					break;
2660 				case 1:
2661 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665 					break;
2666 				case 2:
2667 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2670 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671 					break;
2672 				case 3:
2673 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2677 					break;
2678 				case 4:
2679 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682 							 TILE_SPLIT(split_equal_to_row_size));
2683 					break;
2684 				case 5:
2685 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688 					break;
2689 				case 6:
2690 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2691 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2692 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2693 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2694 					break;
2695 				case 7:
2696 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2697 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2698 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699 							 TILE_SPLIT(split_equal_to_row_size));
2700 					break;
2701 				case 8:
2702 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2703 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2704 					break;
2705 				case 9:
2706 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2707 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2709 					break;
2710 				case 10:
2711 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 					break;
2716 				case 11:
2717 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721 					break;
2722 				case 12:
2723 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 					break;
2728 				case 13:
2729 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2730 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2732 					break;
2733 				case 14:
2734 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2737 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 					break;
2739 				case 16:
2740 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 					break;
2745 				case 17:
2746 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2748 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2749 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 					break;
2751 				case 27:
2752 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2753 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2755 					break;
2756 				case 28:
2757 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2758 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2759 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761 					break;
2762 				case 29:
2763 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2765 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767 					break;
2768 				case 30:
2769 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2770 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2771 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773 					break;
2774 				default:
2775 					gb_tile_moden = 0;
2776 					break;
2777 				}
2778 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2779 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2780 			}
2781 		}
2782 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2783 			switch (reg_offset) {
2784 			case 0:
2785 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2787 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2788 						 NUM_BANKS(ADDR_SURF_16_BANK));
2789 				break;
2790 			case 1:
2791 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 						 NUM_BANKS(ADDR_SURF_16_BANK));
2795 				break;
2796 			case 2:
2797 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2799 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2800 						 NUM_BANKS(ADDR_SURF_16_BANK));
2801 				break;
2802 			case 3:
2803 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2806 						 NUM_BANKS(ADDR_SURF_16_BANK));
2807 				break;
2808 			case 4:
2809 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812 						 NUM_BANKS(ADDR_SURF_16_BANK));
2813 				break;
2814 			case 5:
2815 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818 						 NUM_BANKS(ADDR_SURF_8_BANK));
2819 				break;
2820 			case 6:
2821 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2824 						 NUM_BANKS(ADDR_SURF_4_BANK));
2825 				break;
2826 			case 8:
2827 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 						 NUM_BANKS(ADDR_SURF_16_BANK));
2831 				break;
2832 			case 9:
2833 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2834 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836 						 NUM_BANKS(ADDR_SURF_16_BANK));
2837 				break;
2838 			case 10:
2839 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2841 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842 						 NUM_BANKS(ADDR_SURF_16_BANK));
2843 				break;
2844 			case 11:
2845 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2847 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 						 NUM_BANKS(ADDR_SURF_16_BANK));
2849 				break;
2850 			case 12:
2851 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 						 NUM_BANKS(ADDR_SURF_16_BANK));
2855 				break;
2856 			case 13:
2857 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 						 NUM_BANKS(ADDR_SURF_8_BANK));
2861 				break;
2862 			case 14:
2863 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2866 						 NUM_BANKS(ADDR_SURF_4_BANK));
2867 				break;
2868 			default:
2869 				gb_tile_moden = 0;
2870 				break;
2871 			}
2872 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2873 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2874 		}
2875 	} else if (num_pipe_configs == 2) {
2876 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2877 			switch (reg_offset) {
2878 			case 0:
2879 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 						 PIPE_CONFIG(ADDR_SURF_P2) |
2882 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2883 				break;
2884 			case 1:
2885 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 						 PIPE_CONFIG(ADDR_SURF_P2) |
2888 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 				break;
2890 			case 2:
2891 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 						 PIPE_CONFIG(ADDR_SURF_P2) |
2894 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 				break;
2896 			case 3:
2897 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 						 PIPE_CONFIG(ADDR_SURF_P2) |
2900 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2901 				break;
2902 			case 4:
2903 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 						 PIPE_CONFIG(ADDR_SURF_P2) |
2906 						 TILE_SPLIT(split_equal_to_row_size));
2907 				break;
2908 			case 5:
2909 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2910 						 PIPE_CONFIG(ADDR_SURF_P2) |
2911 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912 				break;
2913 			case 6:
2914 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2915 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2916 						 PIPE_CONFIG(ADDR_SURF_P2) |
2917 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2918 				break;
2919 			case 7:
2920 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2921 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2922 						 PIPE_CONFIG(ADDR_SURF_P2) |
2923 						 TILE_SPLIT(split_equal_to_row_size));
2924 				break;
2925 			case 8:
2926 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2927 						PIPE_CONFIG(ADDR_SURF_P2);
2928 				break;
2929 			case 9:
2930 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2931 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932 						 PIPE_CONFIG(ADDR_SURF_P2));
2933 				break;
2934 			case 10:
2935 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937 						 PIPE_CONFIG(ADDR_SURF_P2) |
2938 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 				break;
2940 			case 11:
2941 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 						 PIPE_CONFIG(ADDR_SURF_P2) |
2944 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 				break;
2946 			case 12:
2947 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 						 PIPE_CONFIG(ADDR_SURF_P2) |
2950 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 				break;
2952 			case 13:
2953 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 						 PIPE_CONFIG(ADDR_SURF_P2) |
2955 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 				break;
2957 			case 14:
2958 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 						 PIPE_CONFIG(ADDR_SURF_P2) |
2961 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962 				break;
2963 			case 16:
2964 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 						 PIPE_CONFIG(ADDR_SURF_P2) |
2967 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 				break;
2969 			case 17:
2970 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 						 PIPE_CONFIG(ADDR_SURF_P2) |
2973 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 				break;
2975 			case 27:
2976 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2978 						 PIPE_CONFIG(ADDR_SURF_P2));
2979 				break;
2980 			case 28:
2981 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2982 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983 						 PIPE_CONFIG(ADDR_SURF_P2) |
2984 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 				break;
2986 			case 29:
2987 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989 						 PIPE_CONFIG(ADDR_SURF_P2) |
2990 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991 				break;
2992 			case 30:
2993 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 						 PIPE_CONFIG(ADDR_SURF_P2) |
2996 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 				break;
2998 			default:
2999 				gb_tile_moden = 0;
3000 				break;
3001 			}
3002 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3003 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3004 		}
3005 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3006 			switch (reg_offset) {
3007 			case 0:
3008 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3009 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3010 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 						 NUM_BANKS(ADDR_SURF_16_BANK));
3012 				break;
3013 			case 1:
3014 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3015 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3017 						 NUM_BANKS(ADDR_SURF_16_BANK));
3018 				break;
3019 			case 2:
3020 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3022 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 						 NUM_BANKS(ADDR_SURF_16_BANK));
3024 				break;
3025 			case 3:
3026 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3028 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029 						 NUM_BANKS(ADDR_SURF_16_BANK));
3030 				break;
3031 			case 4:
3032 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035 						 NUM_BANKS(ADDR_SURF_16_BANK));
3036 				break;
3037 			case 5:
3038 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041 						 NUM_BANKS(ADDR_SURF_16_BANK));
3042 				break;
3043 			case 6:
3044 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047 						 NUM_BANKS(ADDR_SURF_8_BANK));
3048 				break;
3049 			case 8:
3050 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3051 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3052 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053 						 NUM_BANKS(ADDR_SURF_16_BANK));
3054 				break;
3055 			case 9:
3056 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3057 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 						 NUM_BANKS(ADDR_SURF_16_BANK));
3060 				break;
3061 			case 10:
3062 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3063 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3064 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 						 NUM_BANKS(ADDR_SURF_16_BANK));
3066 				break;
3067 			case 11:
3068 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3069 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071 						 NUM_BANKS(ADDR_SURF_16_BANK));
3072 				break;
3073 			case 12:
3074 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3076 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3077 						 NUM_BANKS(ADDR_SURF_16_BANK));
3078 				break;
3079 			case 13:
3080 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3083 						 NUM_BANKS(ADDR_SURF_16_BANK));
3084 				break;
3085 			case 14:
3086 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089 						 NUM_BANKS(ADDR_SURF_8_BANK));
3090 				break;
3091 			default:
3092 				gb_tile_moden = 0;
3093 				break;
3094 			}
3095 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3096 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3097 		}
3098 	} else
3099 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3100 }
3101 
3102 /**
3103  * cik_select_se_sh - select which SE, SH to address
3104  *
3105  * @rdev: radeon_device pointer
3106  * @se_num: shader engine to address
3107  * @sh_num: sh block to address
3108  *
3109  * Select which SE, SH combinations to address. Certain
3110  * registers are instanced per SE or SH.  0xffffffff means
3111  * broadcast to all SEs or SHs (CIK).
3112  */
3113 static void cik_select_se_sh(struct radeon_device *rdev,
3114 			     u32 se_num, u32 sh_num)
3115 {
3116 	u32 data = INSTANCE_BROADCAST_WRITES;
3117 
3118 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3119 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3120 	else if (se_num == 0xffffffff)
3121 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3122 	else if (sh_num == 0xffffffff)
3123 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3124 	else
3125 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3126 	WREG32(GRBM_GFX_INDEX, data);
3127 }
3128 
3129 /**
3130  * cik_create_bitmask - create a bitmask
3131  *
3132  * @bit_width: length of the mask
3133  *
3134  * create a variable length bit mask (CIK).
3135  * Returns the bitmask.
3136  */
3137 static u32 cik_create_bitmask(u32 bit_width)
3138 {
3139 	u32 i, mask = 0;
3140 
3141 	for (i = 0; i < bit_width; i++) {
3142 		mask <<= 1;
3143 		mask |= 1;
3144 	}
3145 	return mask;
3146 }
3147 
3148 /**
3149  * cik_get_rb_disabled - computes the mask of disabled RBs
3150  *
3151  * @rdev: radeon_device pointer
3152  * @max_rb_num: max RBs (render backends) for the asic
3153  * @se_num: number of SEs (shader engines) for the asic
3154  * @sh_per_se: number of SH blocks per SE for the asic
3155  *
3156  * Calculates the bitmask of disabled RBs (CIK).
3157  * Returns the disabled RB bitmask.
3158  */
3159 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3160 			      u32 max_rb_num_per_se,
3161 			      u32 sh_per_se)
3162 {
3163 	u32 data, mask;
3164 
3165 	data = RREG32(CC_RB_BACKEND_DISABLE);
3166 	if (data & 1)
3167 		data &= BACKEND_DISABLE_MASK;
3168 	else
3169 		data = 0;
3170 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3171 
3172 	data >>= BACKEND_DISABLE_SHIFT;
3173 
3174 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3175 
3176 	return data & mask;
3177 }
3178 
3179 /**
3180  * cik_setup_rb - setup the RBs on the asic
3181  *
3182  * @rdev: radeon_device pointer
3183  * @se_num: number of SEs (shader engines) for the asic
3184  * @sh_per_se: number of SH blocks per SE for the asic
3185  * @max_rb_num: max RBs (render backends) for the asic
3186  *
3187  * Configures per-SE/SH RB registers (CIK).
3188  */
3189 static void cik_setup_rb(struct radeon_device *rdev,
3190 			 u32 se_num, u32 sh_per_se,
3191 			 u32 max_rb_num_per_se)
3192 {
3193 	int i, j;
3194 	u32 data, mask;
3195 	u32 disabled_rbs = 0;
3196 	u32 enabled_rbs = 0;
3197 
3198 	for (i = 0; i < se_num; i++) {
3199 		for (j = 0; j < sh_per_se; j++) {
3200 			cik_select_se_sh(rdev, i, j);
3201 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3202 			if (rdev->family == CHIP_HAWAII)
3203 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3204 			else
3205 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3206 		}
3207 	}
3208 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3209 
3210 	mask = 1;
3211 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3212 		if (!(disabled_rbs & mask))
3213 			enabled_rbs |= mask;
3214 		mask <<= 1;
3215 	}
3216 
3217 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3218 
3219 	for (i = 0; i < se_num; i++) {
3220 		cik_select_se_sh(rdev, i, 0xffffffff);
3221 		data = 0;
3222 		for (j = 0; j < sh_per_se; j++) {
3223 			switch (enabled_rbs & 3) {
3224 			case 0:
3225 				if (j == 0)
3226 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3227 				else
3228 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3229 				break;
3230 			case 1:
3231 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3232 				break;
3233 			case 2:
3234 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3235 				break;
3236 			case 3:
3237 			default:
3238 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3239 				break;
3240 			}
3241 			enabled_rbs >>= 2;
3242 		}
3243 		WREG32(PA_SC_RASTER_CONFIG, data);
3244 	}
3245 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3246 }
3247 
3248 /**
3249  * cik_gpu_init - setup the 3D engine
3250  *
3251  * @rdev: radeon_device pointer
3252  *
3253  * Configures the 3D engine and tiling configuration
3254  * registers so that the 3D engine is usable.
3255  */
3256 static void cik_gpu_init(struct radeon_device *rdev)
3257 {
3258 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3259 	u32 mc_shared_chmap, mc_arb_ramcfg;
3260 	u32 hdp_host_path_cntl;
3261 	u32 tmp;
3262 	int i, j, k;
3263 
3264 	switch (rdev->family) {
3265 	case CHIP_BONAIRE:
3266 		rdev->config.cik.max_shader_engines = 2;
3267 		rdev->config.cik.max_tile_pipes = 4;
3268 		rdev->config.cik.max_cu_per_sh = 7;
3269 		rdev->config.cik.max_sh_per_se = 1;
3270 		rdev->config.cik.max_backends_per_se = 2;
3271 		rdev->config.cik.max_texture_channel_caches = 4;
3272 		rdev->config.cik.max_gprs = 256;
3273 		rdev->config.cik.max_gs_threads = 32;
3274 		rdev->config.cik.max_hw_contexts = 8;
3275 
3276 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3277 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3278 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3279 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3280 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3281 		break;
3282 	case CHIP_HAWAII:
3283 		rdev->config.cik.max_shader_engines = 4;
3284 		rdev->config.cik.max_tile_pipes = 16;
3285 		rdev->config.cik.max_cu_per_sh = 11;
3286 		rdev->config.cik.max_sh_per_se = 1;
3287 		rdev->config.cik.max_backends_per_se = 4;
3288 		rdev->config.cik.max_texture_channel_caches = 16;
3289 		rdev->config.cik.max_gprs = 256;
3290 		rdev->config.cik.max_gs_threads = 32;
3291 		rdev->config.cik.max_hw_contexts = 8;
3292 
3293 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3294 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3295 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3296 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3297 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3298 		break;
3299 	case CHIP_KAVERI:
3300 		rdev->config.cik.max_shader_engines = 1;
3301 		rdev->config.cik.max_tile_pipes = 4;
3302 		if ((rdev->pdev->device == 0x1304) ||
3303 		    (rdev->pdev->device == 0x1305) ||
3304 		    (rdev->pdev->device == 0x130C) ||
3305 		    (rdev->pdev->device == 0x130F) ||
3306 		    (rdev->pdev->device == 0x1310) ||
3307 		    (rdev->pdev->device == 0x1311) ||
3308 		    (rdev->pdev->device == 0x131C)) {
3309 			rdev->config.cik.max_cu_per_sh = 8;
3310 			rdev->config.cik.max_backends_per_se = 2;
3311 		} else if ((rdev->pdev->device == 0x1309) ||
3312 			   (rdev->pdev->device == 0x130A) ||
3313 			   (rdev->pdev->device == 0x130D) ||
3314 			   (rdev->pdev->device == 0x1313) ||
3315 			   (rdev->pdev->device == 0x131D)) {
3316 			rdev->config.cik.max_cu_per_sh = 6;
3317 			rdev->config.cik.max_backends_per_se = 2;
3318 		} else if ((rdev->pdev->device == 0x1306) ||
3319 			   (rdev->pdev->device == 0x1307) ||
3320 			   (rdev->pdev->device == 0x130B) ||
3321 			   (rdev->pdev->device == 0x130E) ||
3322 			   (rdev->pdev->device == 0x1315) ||
3323 			   (rdev->pdev->device == 0x131B)) {
3324 			rdev->config.cik.max_cu_per_sh = 4;
3325 			rdev->config.cik.max_backends_per_se = 1;
3326 		} else {
3327 			rdev->config.cik.max_cu_per_sh = 3;
3328 			rdev->config.cik.max_backends_per_se = 1;
3329 		}
3330 		rdev->config.cik.max_sh_per_se = 1;
3331 		rdev->config.cik.max_texture_channel_caches = 4;
3332 		rdev->config.cik.max_gprs = 256;
3333 		rdev->config.cik.max_gs_threads = 16;
3334 		rdev->config.cik.max_hw_contexts = 8;
3335 
3336 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3337 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3338 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3339 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3340 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3341 		break;
3342 	case CHIP_KABINI:
3343 	case CHIP_MULLINS:
3344 	default:
3345 		rdev->config.cik.max_shader_engines = 1;
3346 		rdev->config.cik.max_tile_pipes = 2;
3347 		rdev->config.cik.max_cu_per_sh = 2;
3348 		rdev->config.cik.max_sh_per_se = 1;
3349 		rdev->config.cik.max_backends_per_se = 1;
3350 		rdev->config.cik.max_texture_channel_caches = 2;
3351 		rdev->config.cik.max_gprs = 256;
3352 		rdev->config.cik.max_gs_threads = 16;
3353 		rdev->config.cik.max_hw_contexts = 8;
3354 
3355 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3356 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3357 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3358 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3359 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3360 		break;
3361 	}
3362 
3363 	/* Initialize HDP */
3364 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3365 		WREG32((0x2c14 + j), 0x00000000);
3366 		WREG32((0x2c18 + j), 0x00000000);
3367 		WREG32((0x2c1c + j), 0x00000000);
3368 		WREG32((0x2c20 + j), 0x00000000);
3369 		WREG32((0x2c24 + j), 0x00000000);
3370 	}
3371 
3372 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3373 
3374 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3375 
3376 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3377 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3378 
3379 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3380 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3381 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3382 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3383 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3384 		rdev->config.cik.mem_row_size_in_kb = 4;
3385 	/* XXX use MC settings? */
3386 	rdev->config.cik.shader_engine_tile_size = 32;
3387 	rdev->config.cik.num_gpus = 1;
3388 	rdev->config.cik.multi_gpu_tile_size = 64;
3389 
3390 	/* fix up row size */
3391 	gb_addr_config &= ~ROW_SIZE_MASK;
3392 	switch (rdev->config.cik.mem_row_size_in_kb) {
3393 	case 1:
3394 	default:
3395 		gb_addr_config |= ROW_SIZE(0);
3396 		break;
3397 	case 2:
3398 		gb_addr_config |= ROW_SIZE(1);
3399 		break;
3400 	case 4:
3401 		gb_addr_config |= ROW_SIZE(2);
3402 		break;
3403 	}
3404 
3405 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3406 	 * not have bank info, so create a custom tiling dword.
3407 	 * bits 3:0   num_pipes
3408 	 * bits 7:4   num_banks
3409 	 * bits 11:8  group_size
3410 	 * bits 15:12 row_size
3411 	 */
3412 	rdev->config.cik.tile_config = 0;
3413 	switch (rdev->config.cik.num_tile_pipes) {
3414 	case 1:
3415 		rdev->config.cik.tile_config |= (0 << 0);
3416 		break;
3417 	case 2:
3418 		rdev->config.cik.tile_config |= (1 << 0);
3419 		break;
3420 	case 4:
3421 		rdev->config.cik.tile_config |= (2 << 0);
3422 		break;
3423 	case 8:
3424 	default:
3425 		/* XXX what about 12? */
3426 		rdev->config.cik.tile_config |= (3 << 0);
3427 		break;
3428 	}
3429 	rdev->config.cik.tile_config |=
3430 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3431 	rdev->config.cik.tile_config |=
3432 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3433 	rdev->config.cik.tile_config |=
3434 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3435 
3436 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3437 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3438 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3439 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3440 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3441 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3442 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3443 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3444 
3445 	cik_tiling_mode_table_init(rdev);
3446 
3447 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3448 		     rdev->config.cik.max_sh_per_se,
3449 		     rdev->config.cik.max_backends_per_se);
3450 
3451 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3452 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3453 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
3454 				rdev->config.cik.active_cus +=
3455 					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3456 			}
3457 		}
3458 	}
3459 
3460 	/* set HW defaults for 3D engine */
3461 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3462 
3463 	WREG32(SX_DEBUG_1, 0x20);
3464 
3465 	WREG32(TA_CNTL_AUX, 0x00010000);
3466 
3467 	tmp = RREG32(SPI_CONFIG_CNTL);
3468 	tmp |= 0x03000000;
3469 	WREG32(SPI_CONFIG_CNTL, tmp);
3470 
3471 	WREG32(SQ_CONFIG, 1);
3472 
3473 	WREG32(DB_DEBUG, 0);
3474 
3475 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3476 	tmp |= 0x00000400;
3477 	WREG32(DB_DEBUG2, tmp);
3478 
3479 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3480 	tmp |= 0x00020200;
3481 	WREG32(DB_DEBUG3, tmp);
3482 
3483 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3484 	tmp |= 0x00018208;
3485 	WREG32(CB_HW_CONTROL, tmp);
3486 
3487 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3488 
3489 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3490 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3491 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3492 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3493 
3494 	WREG32(VGT_NUM_INSTANCES, 1);
3495 
3496 	WREG32(CP_PERFMON_CNTL, 0);
3497 
3498 	WREG32(SQ_CONFIG, 0);
3499 
3500 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3501 					  FORCE_EOV_MAX_REZ_CNT(255)));
3502 
3503 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3504 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3505 
3506 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3507 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3508 
3509 	tmp = RREG32(HDP_MISC_CNTL);
3510 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3511 	WREG32(HDP_MISC_CNTL, tmp);
3512 
3513 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3514 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3515 
3516 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3517 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3518 
3519 	udelay(50);
3520 }
3521 
3522 /*
3523  * GPU scratch registers helpers function.
3524  */
3525 /**
3526  * cik_scratch_init - setup driver info for CP scratch regs
3527  *
3528  * @rdev: radeon_device pointer
3529  *
3530  * Set up the number and offset of the CP scratch registers.
3531  * NOTE: use of CP scratch registers is a legacy inferface and
3532  * is not used by default on newer asics (r6xx+).  On newer asics,
3533  * memory buffers are used for fences rather than scratch regs.
3534  */
3535 static void cik_scratch_init(struct radeon_device *rdev)
3536 {
3537 	int i;
3538 
3539 	rdev->scratch.num_reg = 7;
3540 	rdev->scratch.reg_base = SCRATCH_REG0;
3541 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3542 		rdev->scratch.free[i] = true;
3543 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3544 	}
3545 }
3546 
3547 /**
3548  * cik_ring_test - basic gfx ring test
3549  *
3550  * @rdev: radeon_device pointer
3551  * @ring: radeon_ring structure holding ring information
3552  *
3553  * Allocate a scratch register and write to it using the gfx ring (CIK).
3554  * Provides a basic gfx ring test to verify that the ring is working.
3555  * Used by cik_cp_gfx_resume();
3556  * Returns 0 on success, error on failure.
3557  */
3558 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3559 {
3560 	uint32_t scratch;
3561 	uint32_t tmp = 0;
3562 	unsigned i;
3563 	int r;
3564 
3565 	r = radeon_scratch_get(rdev, &scratch);
3566 	if (r) {
3567 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3568 		return r;
3569 	}
3570 	WREG32(scratch, 0xCAFEDEAD);
3571 	r = radeon_ring_lock(rdev, ring, 3);
3572 	if (r) {
3573 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3574 		radeon_scratch_free(rdev, scratch);
3575 		return r;
3576 	}
3577 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3578 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3579 	radeon_ring_write(ring, 0xDEADBEEF);
3580 	radeon_ring_unlock_commit(rdev, ring);
3581 
3582 	for (i = 0; i < rdev->usec_timeout; i++) {
3583 		tmp = RREG32(scratch);
3584 		if (tmp == 0xDEADBEEF)
3585 			break;
3586 		DRM_UDELAY(1);
3587 	}
3588 	if (i < rdev->usec_timeout) {
3589 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3590 	} else {
3591 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3592 			  ring->idx, scratch, tmp);
3593 		r = -EINVAL;
3594 	}
3595 	radeon_scratch_free(rdev, scratch);
3596 	return r;
3597 }
3598 
3599 /**
3600  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3601  *
3602  * @rdev: radeon_device pointer
3603  * @ridx: radeon ring index
3604  *
3605  * Emits an hdp flush on the cp.
3606  */
3607 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3608 				       int ridx)
3609 {
3610 	struct radeon_ring *ring = &rdev->ring[ridx];
3611 	u32 ref_and_mask;
3612 
3613 	switch (ring->idx) {
3614 	case CAYMAN_RING_TYPE_CP1_INDEX:
3615 	case CAYMAN_RING_TYPE_CP2_INDEX:
3616 	default:
3617 		switch (ring->me) {
3618 		case 0:
3619 			ref_and_mask = CP2 << ring->pipe;
3620 			break;
3621 		case 1:
3622 			ref_and_mask = CP6 << ring->pipe;
3623 			break;
3624 		default:
3625 			return;
3626 		}
3627 		break;
3628 	case RADEON_RING_TYPE_GFX_INDEX:
3629 		ref_and_mask = CP0;
3630 		break;
3631 	}
3632 
3633 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3634 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3635 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3636 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3637 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3638 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3639 	radeon_ring_write(ring, ref_and_mask);
3640 	radeon_ring_write(ring, ref_and_mask);
3641 	radeon_ring_write(ring, 0x20); /* poll interval */
3642 }
3643 
3644 /**
3645  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3646  *
3647  * @rdev: radeon_device pointer
3648  * @fence: radeon fence object
3649  *
3650  * Emits a fence sequnce number on the gfx ring and flushes
3651  * GPU caches.
3652  */
3653 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3654 			     struct radeon_fence *fence)
3655 {
3656 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3657 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3658 
3659 	/* EVENT_WRITE_EOP - flush caches, send int */
3660 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3661 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3662 				 EOP_TC_ACTION_EN |
3663 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3664 				 EVENT_INDEX(5)));
3665 	radeon_ring_write(ring, addr & 0xfffffffc);
3666 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3667 	radeon_ring_write(ring, fence->seq);
3668 	radeon_ring_write(ring, 0);
3669 	/* HDP flush */
3670 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3671 }
3672 
3673 /**
3674  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3675  *
3676  * @rdev: radeon_device pointer
3677  * @fence: radeon fence object
3678  *
3679  * Emits a fence sequnce number on the compute ring and flushes
3680  * GPU caches.
3681  */
3682 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3683 				 struct radeon_fence *fence)
3684 {
3685 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3686 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3687 
3688 	/* RELEASE_MEM - flush caches, send int */
3689 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3690 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3691 				 EOP_TC_ACTION_EN |
3692 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3693 				 EVENT_INDEX(5)));
3694 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3695 	radeon_ring_write(ring, addr & 0xfffffffc);
3696 	radeon_ring_write(ring, upper_32_bits(addr));
3697 	radeon_ring_write(ring, fence->seq);
3698 	radeon_ring_write(ring, 0);
3699 	/* HDP flush */
3700 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3701 }
3702 
3703 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3704 			     struct radeon_ring *ring,
3705 			     struct radeon_semaphore *semaphore,
3706 			     bool emit_wait)
3707 {
3708 	uint64_t addr = semaphore->gpu_addr;
3709 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3710 
3711 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3712 	radeon_ring_write(ring, lower_32_bits(addr));
3713 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3714 
3715 	return true;
3716 }
3717 
3718 /**
3719  * cik_copy_cpdma - copy pages using the CP DMA engine
3720  *
3721  * @rdev: radeon_device pointer
3722  * @src_offset: src GPU address
3723  * @dst_offset: dst GPU address
3724  * @num_gpu_pages: number of GPU pages to xfer
3725  * @fence: radeon fence object
3726  *
3727  * Copy GPU paging using the CP DMA engine (CIK+).
3728  * Used by the radeon ttm implementation to move pages if
3729  * registered as the asic copy callback.
3730  */
3731 int cik_copy_cpdma(struct radeon_device *rdev,
3732 		   uint64_t src_offset, uint64_t dst_offset,
3733 		   unsigned num_gpu_pages,
3734 		   struct radeon_fence **fence)
3735 {
3736 	struct radeon_semaphore *sem = NULL;
3737 	int ring_index = rdev->asic->copy.blit_ring_index;
3738 	struct radeon_ring *ring = &rdev->ring[ring_index];
3739 	u32 size_in_bytes, cur_size_in_bytes, control;
3740 	int i, num_loops;
3741 	int r = 0;
3742 
3743 	r = radeon_semaphore_create(rdev, &sem);
3744 	if (r) {
3745 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3746 		return r;
3747 	}
3748 
3749 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3750 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3751 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3752 	if (r) {
3753 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3754 		radeon_semaphore_free(rdev, &sem, NULL);
3755 		return r;
3756 	}
3757 
3758 	radeon_semaphore_sync_to(sem, *fence);
3759 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3760 
3761 	for (i = 0; i < num_loops; i++) {
3762 		cur_size_in_bytes = size_in_bytes;
3763 		if (cur_size_in_bytes > 0x1fffff)
3764 			cur_size_in_bytes = 0x1fffff;
3765 		size_in_bytes -= cur_size_in_bytes;
3766 		control = 0;
3767 		if (size_in_bytes == 0)
3768 			control |= PACKET3_DMA_DATA_CP_SYNC;
3769 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3770 		radeon_ring_write(ring, control);
3771 		radeon_ring_write(ring, lower_32_bits(src_offset));
3772 		radeon_ring_write(ring, upper_32_bits(src_offset));
3773 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3774 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3775 		radeon_ring_write(ring, cur_size_in_bytes);
3776 		src_offset += cur_size_in_bytes;
3777 		dst_offset += cur_size_in_bytes;
3778 	}
3779 
3780 	r = radeon_fence_emit(rdev, fence, ring->idx);
3781 	if (r) {
3782 		radeon_ring_unlock_undo(rdev, ring);
3783 		radeon_semaphore_free(rdev, &sem, NULL);
3784 		return r;
3785 	}
3786 
3787 	radeon_ring_unlock_commit(rdev, ring);
3788 	radeon_semaphore_free(rdev, &sem, *fence);
3789 
3790 	return r;
3791 }
3792 
3793 /*
3794  * IB stuff
3795  */
3796 /**
3797  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3798  *
3799  * @rdev: radeon_device pointer
3800  * @ib: radeon indirect buffer object
3801  *
3802  * Emits an DE (drawing engine) or CE (constant engine) IB
3803  * on the gfx ring.  IBs are usually generated by userspace
3804  * acceleration drivers and submitted to the kernel for
3805  * sheduling on the ring.  This function schedules the IB
3806  * on the gfx ring for execution by the GPU.
3807  */
3808 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3809 {
3810 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3811 	u32 header, control = INDIRECT_BUFFER_VALID;
3812 
3813 	if (ib->is_const_ib) {
3814 		/* set switch buffer packet before const IB */
3815 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3816 		radeon_ring_write(ring, 0);
3817 
3818 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3819 	} else {
3820 		u32 next_rptr;
3821 		if (ring->rptr_save_reg) {
3822 			next_rptr = ring->wptr + 3 + 4;
3823 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3824 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3825 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3826 			radeon_ring_write(ring, next_rptr);
3827 		} else if (rdev->wb.enabled) {
3828 			next_rptr = ring->wptr + 5 + 4;
3829 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3830 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3831 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3832 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3833 			radeon_ring_write(ring, next_rptr);
3834 		}
3835 
3836 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3837 	}
3838 
3839 	control |= ib->length_dw |
3840 		(ib->vm ? (ib->vm->id << 24) : 0);
3841 
3842 	radeon_ring_write(ring, header);
3843 	radeon_ring_write(ring,
3844 #ifdef __BIG_ENDIAN
3845 			  (2 << 0) |
3846 #endif
3847 			  (ib->gpu_addr & 0xFFFFFFFC));
3848 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3849 	radeon_ring_write(ring, control);
3850 }
3851 
3852 /**
3853  * cik_ib_test - basic gfx ring IB test
3854  *
3855  * @rdev: radeon_device pointer
3856  * @ring: radeon_ring structure holding ring information
3857  *
3858  * Allocate an IB and execute it on the gfx ring (CIK).
3859  * Provides a basic gfx ring test to verify that IBs are working.
3860  * Returns 0 on success, error on failure.
3861  */
3862 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3863 {
3864 	struct radeon_ib ib;
3865 	uint32_t scratch;
3866 	uint32_t tmp = 0;
3867 	unsigned i;
3868 	int r;
3869 
3870 	r = radeon_scratch_get(rdev, &scratch);
3871 	if (r) {
3872 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3873 		return r;
3874 	}
3875 	WREG32(scratch, 0xCAFEDEAD);
3876 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3877 	if (r) {
3878 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3879 		radeon_scratch_free(rdev, scratch);
3880 		return r;
3881 	}
3882 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3883 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3884 	ib.ptr[2] = 0xDEADBEEF;
3885 	ib.length_dw = 3;
3886 	r = radeon_ib_schedule(rdev, &ib, NULL);
3887 	if (r) {
3888 		radeon_scratch_free(rdev, scratch);
3889 		radeon_ib_free(rdev, &ib);
3890 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3891 		return r;
3892 	}
3893 	r = radeon_fence_wait(ib.fence, false);
3894 	if (r) {
3895 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3896 		radeon_scratch_free(rdev, scratch);
3897 		radeon_ib_free(rdev, &ib);
3898 		return r;
3899 	}
3900 	for (i = 0; i < rdev->usec_timeout; i++) {
3901 		tmp = RREG32(scratch);
3902 		if (tmp == 0xDEADBEEF)
3903 			break;
3904 		DRM_UDELAY(1);
3905 	}
3906 	if (i < rdev->usec_timeout) {
3907 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3908 	} else {
3909 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3910 			  scratch, tmp);
3911 		r = -EINVAL;
3912 	}
3913 	radeon_scratch_free(rdev, scratch);
3914 	radeon_ib_free(rdev, &ib);
3915 	return r;
3916 }
3917 
3918 /*
3919  * CP.
3920  * On CIK, gfx and compute now have independant command processors.
3921  *
3922  * GFX
3923  * Gfx consists of a single ring and can process both gfx jobs and
3924  * compute jobs.  The gfx CP consists of three microengines (ME):
3925  * PFP - Pre-Fetch Parser
3926  * ME - Micro Engine
3927  * CE - Constant Engine
3928  * The PFP and ME make up what is considered the Drawing Engine (DE).
3929  * The CE is an asynchronous engine used for updating buffer desciptors
3930  * used by the DE so that they can be loaded into cache in parallel
3931  * while the DE is processing state update packets.
3932  *
3933  * Compute
3934  * The compute CP consists of two microengines (ME):
3935  * MEC1 - Compute MicroEngine 1
3936  * MEC2 - Compute MicroEngine 2
3937  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3938  * The queues are exposed to userspace and are programmed directly
3939  * by the compute runtime.
3940  */
3941 /**
3942  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3943  *
3944  * @rdev: radeon_device pointer
3945  * @enable: enable or disable the MEs
3946  *
3947  * Halts or unhalts the gfx MEs.
3948  */
3949 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3950 {
3951 	if (enable)
3952 		WREG32(CP_ME_CNTL, 0);
3953 	else {
3954 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3955 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3956 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3957 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3958 	}
3959 	udelay(50);
3960 }
3961 
3962 /**
3963  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3964  *
3965  * @rdev: radeon_device pointer
3966  *
3967  * Loads the gfx PFP, ME, and CE ucode.
3968  * Returns 0 for success, -EINVAL if the ucode is not available.
3969  */
3970 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3971 {
3972 	const __be32 *fw_data;
3973 	int i;
3974 
3975 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3976 		return -EINVAL;
3977 
3978 	cik_cp_gfx_enable(rdev, false);
3979 
3980 	/* PFP */
3981 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3982 	WREG32(CP_PFP_UCODE_ADDR, 0);
3983 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3984 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3985 	WREG32(CP_PFP_UCODE_ADDR, 0);
3986 
3987 	/* CE */
3988 	fw_data = (const __be32 *)rdev->ce_fw->data;
3989 	WREG32(CP_CE_UCODE_ADDR, 0);
3990 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3991 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3992 	WREG32(CP_CE_UCODE_ADDR, 0);
3993 
3994 	/* ME */
3995 	fw_data = (const __be32 *)rdev->me_fw->data;
3996 	WREG32(CP_ME_RAM_WADDR, 0);
3997 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3998 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3999 	WREG32(CP_ME_RAM_WADDR, 0);
4000 
4001 	WREG32(CP_PFP_UCODE_ADDR, 0);
4002 	WREG32(CP_CE_UCODE_ADDR, 0);
4003 	WREG32(CP_ME_RAM_WADDR, 0);
4004 	WREG32(CP_ME_RAM_RADDR, 0);
4005 	return 0;
4006 }
4007 
4008 /**
4009  * cik_cp_gfx_start - start the gfx ring
4010  *
4011  * @rdev: radeon_device pointer
4012  *
4013  * Enables the ring and loads the clear state context and other
4014  * packets required to init the ring.
4015  * Returns 0 for success, error for failure.
4016  */
4017 static int cik_cp_gfx_start(struct radeon_device *rdev)
4018 {
4019 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4020 	int r, i;
4021 
4022 	/* init the CP */
4023 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4024 	WREG32(CP_ENDIAN_SWAP, 0);
4025 	WREG32(CP_DEVICE_ID, 1);
4026 
4027 	cik_cp_gfx_enable(rdev, true);
4028 
4029 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4030 	if (r) {
4031 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4032 		return r;
4033 	}
4034 
4035 	/* init the CE partitions.  CE only used for gfx on CIK */
4036 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4037 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4038 	radeon_ring_write(ring, 0xc000);
4039 	radeon_ring_write(ring, 0xc000);
4040 
4041 	/* setup clear context state */
4042 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4043 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4044 
4045 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4046 	radeon_ring_write(ring, 0x80000000);
4047 	radeon_ring_write(ring, 0x80000000);
4048 
4049 	for (i = 0; i < cik_default_size; i++)
4050 		radeon_ring_write(ring, cik_default_state[i]);
4051 
4052 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4053 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4054 
4055 	/* set clear context state */
4056 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4057 	radeon_ring_write(ring, 0);
4058 
4059 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4060 	radeon_ring_write(ring, 0x00000316);
4061 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4062 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4063 
4064 	radeon_ring_unlock_commit(rdev, ring);
4065 
4066 	return 0;
4067 }
4068 
4069 /**
4070  * cik_cp_gfx_fini - stop the gfx ring
4071  *
4072  * @rdev: radeon_device pointer
4073  *
4074  * Stop the gfx ring and tear down the driver ring
4075  * info.
4076  */
4077 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4078 {
4079 	cik_cp_gfx_enable(rdev, false);
4080 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4081 }
4082 
4083 /**
4084  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4085  *
4086  * @rdev: radeon_device pointer
4087  *
4088  * Program the location and size of the gfx ring buffer
4089  * and test it to make sure it's working.
4090  * Returns 0 for success, error for failure.
4091  */
4092 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4093 {
4094 	struct radeon_ring *ring;
4095 	u32 tmp;
4096 	u32 rb_bufsz;
4097 	u64 rb_addr;
4098 	int r;
4099 
4100 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4101 	if (rdev->family != CHIP_HAWAII)
4102 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4103 
4104 	/* Set the write pointer delay */
4105 	WREG32(CP_RB_WPTR_DELAY, 0);
4106 
4107 	/* set the RB to use vmid 0 */
4108 	WREG32(CP_RB_VMID, 0);
4109 
4110 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4111 
4112 	/* ring 0 - compute and gfx */
4113 	/* Set ring buffer size */
4114 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4115 	rb_bufsz = order_base_2(ring->ring_size / 8);
4116 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4117 #ifdef __BIG_ENDIAN
4118 	tmp |= BUF_SWAP_32BIT;
4119 #endif
4120 	WREG32(CP_RB0_CNTL, tmp);
4121 
4122 	/* Initialize the ring buffer's read and write pointers */
4123 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4124 	ring->wptr = 0;
4125 	WREG32(CP_RB0_WPTR, ring->wptr);
4126 
4127 	/* set the wb address wether it's enabled or not */
4128 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4129 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4130 
4131 	/* scratch register shadowing is no longer supported */
4132 	WREG32(SCRATCH_UMSK, 0);
4133 
4134 	if (!rdev->wb.enabled)
4135 		tmp |= RB_NO_UPDATE;
4136 
4137 	mdelay(1);
4138 	WREG32(CP_RB0_CNTL, tmp);
4139 
4140 	rb_addr = ring->gpu_addr >> 8;
4141 	WREG32(CP_RB0_BASE, rb_addr);
4142 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4143 
4144 	/* start the ring */
4145 	cik_cp_gfx_start(rdev);
4146 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4147 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4148 	if (r) {
4149 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4150 		return r;
4151 	}
4152 
4153 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4154 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4155 
4156 	return 0;
4157 }
4158 
4159 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4160 		     struct radeon_ring *ring)
4161 {
4162 	u32 rptr;
4163 
4164 	if (rdev->wb.enabled)
4165 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4166 	else
4167 		rptr = RREG32(CP_RB0_RPTR);
4168 
4169 	return rptr;
4170 }
4171 
4172 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4173 		     struct radeon_ring *ring)
4174 {
4175 	u32 wptr;
4176 
4177 	wptr = RREG32(CP_RB0_WPTR);
4178 
4179 	return wptr;
4180 }
4181 
4182 void cik_gfx_set_wptr(struct radeon_device *rdev,
4183 		      struct radeon_ring *ring)
4184 {
4185 	WREG32(CP_RB0_WPTR, ring->wptr);
4186 	(void)RREG32(CP_RB0_WPTR);
4187 }
4188 
4189 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4190 			 struct radeon_ring *ring)
4191 {
4192 	u32 rptr;
4193 
4194 	if (rdev->wb.enabled) {
4195 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4196 	} else {
4197 		mutex_lock(&rdev->srbm_mutex);
4198 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4199 		rptr = RREG32(CP_HQD_PQ_RPTR);
4200 		cik_srbm_select(rdev, 0, 0, 0, 0);
4201 		mutex_unlock(&rdev->srbm_mutex);
4202 	}
4203 
4204 	return rptr;
4205 }
4206 
4207 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4208 			 struct radeon_ring *ring)
4209 {
4210 	u32 wptr;
4211 
4212 	if (rdev->wb.enabled) {
4213 		/* XXX check if swapping is necessary on BE */
4214 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4215 	} else {
4216 		mutex_lock(&rdev->srbm_mutex);
4217 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4218 		wptr = RREG32(CP_HQD_PQ_WPTR);
4219 		cik_srbm_select(rdev, 0, 0, 0, 0);
4220 		mutex_unlock(&rdev->srbm_mutex);
4221 	}
4222 
4223 	return wptr;
4224 }
4225 
4226 void cik_compute_set_wptr(struct radeon_device *rdev,
4227 			  struct radeon_ring *ring)
4228 {
4229 	/* XXX check if swapping is necessary on BE */
4230 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4231 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4232 }
4233 
4234 /**
4235  * cik_cp_compute_enable - enable/disable the compute CP MEs
4236  *
4237  * @rdev: radeon_device pointer
4238  * @enable: enable or disable the MEs
4239  *
4240  * Halts or unhalts the compute MEs.
4241  */
4242 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4243 {
4244 	if (enable)
4245 		WREG32(CP_MEC_CNTL, 0);
4246 	else {
4247 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4248 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4249 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4250 	}
4251 	udelay(50);
4252 }
4253 
4254 /**
4255  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4256  *
4257  * @rdev: radeon_device pointer
4258  *
4259  * Loads the compute MEC1&2 ucode.
4260  * Returns 0 for success, -EINVAL if the ucode is not available.
4261  */
4262 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4263 {
4264 	const __be32 *fw_data;
4265 	int i;
4266 
4267 	if (!rdev->mec_fw)
4268 		return -EINVAL;
4269 
4270 	cik_cp_compute_enable(rdev, false);
4271 
4272 	/* MEC1 */
4273 	fw_data = (const __be32 *)rdev->mec_fw->data;
4274 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4275 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4276 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4277 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4278 
4279 	if (rdev->family == CHIP_KAVERI) {
4280 		/* MEC2 */
4281 		fw_data = (const __be32 *)rdev->mec_fw->data;
4282 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4283 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4284 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4285 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4286 	}
4287 
4288 	return 0;
4289 }
4290 
4291 /**
4292  * cik_cp_compute_start - start the compute queues
4293  *
4294  * @rdev: radeon_device pointer
4295  *
4296  * Enable the compute queues.
4297  * Returns 0 for success, error for failure.
4298  */
4299 static int cik_cp_compute_start(struct radeon_device *rdev)
4300 {
4301 	cik_cp_compute_enable(rdev, true);
4302 
4303 	return 0;
4304 }
4305 
4306 /**
4307  * cik_cp_compute_fini - stop the compute queues
4308  *
4309  * @rdev: radeon_device pointer
4310  *
4311  * Stop the compute queues and tear down the driver queue
4312  * info.
4313  */
4314 static void cik_cp_compute_fini(struct radeon_device *rdev)
4315 {
4316 	int i, idx, r;
4317 
4318 	cik_cp_compute_enable(rdev, false);
4319 
4320 	for (i = 0; i < 2; i++) {
4321 		if (i == 0)
4322 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4323 		else
4324 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4325 
4326 		if (rdev->ring[idx].mqd_obj) {
4327 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4328 			if (unlikely(r != 0))
4329 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4330 
4331 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4332 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4333 
4334 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4335 			rdev->ring[idx].mqd_obj = NULL;
4336 		}
4337 	}
4338 }
4339 
4340 static void cik_mec_fini(struct radeon_device *rdev)
4341 {
4342 	int r;
4343 
4344 	if (rdev->mec.hpd_eop_obj) {
4345 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4346 		if (unlikely(r != 0))
4347 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4348 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4349 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4350 
4351 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4352 		rdev->mec.hpd_eop_obj = NULL;
4353 	}
4354 }
4355 
4356 #define MEC_HPD_SIZE 2048
4357 
4358 static int cik_mec_init(struct radeon_device *rdev)
4359 {
4360 	int r;
4361 	u32 *hpd;
4362 
4363 	/*
4364 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4365 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4366 	 */
4367 	if (rdev->family == CHIP_KAVERI)
4368 		rdev->mec.num_mec = 2;
4369 	else
4370 		rdev->mec.num_mec = 1;
4371 	rdev->mec.num_pipe = 4;
4372 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4373 
4374 	if (rdev->mec.hpd_eop_obj == NULL) {
4375 		r = radeon_bo_create(rdev,
4376 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4377 				     PAGE_SIZE, true,
4378 				     RADEON_GEM_DOMAIN_GTT, NULL,
4379 				     &rdev->mec.hpd_eop_obj);
4380 		if (r) {
4381 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4382 			return r;
4383 		}
4384 	}
4385 
4386 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4387 	if (unlikely(r != 0)) {
4388 		cik_mec_fini(rdev);
4389 		return r;
4390 	}
4391 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4392 			  &rdev->mec.hpd_eop_gpu_addr);
4393 	if (r) {
4394 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4395 		cik_mec_fini(rdev);
4396 		return r;
4397 	}
4398 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4399 	if (r) {
4400 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4401 		cik_mec_fini(rdev);
4402 		return r;
4403 	}
4404 
4405 	/* clear memory.  Not sure if this is required or not */
4406 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4407 
4408 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4409 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4410 
4411 	return 0;
4412 }
4413 
4414 struct hqd_registers
4415 {
4416 	u32 cp_mqd_base_addr;
4417 	u32 cp_mqd_base_addr_hi;
4418 	u32 cp_hqd_active;
4419 	u32 cp_hqd_vmid;
4420 	u32 cp_hqd_persistent_state;
4421 	u32 cp_hqd_pipe_priority;
4422 	u32 cp_hqd_queue_priority;
4423 	u32 cp_hqd_quantum;
4424 	u32 cp_hqd_pq_base;
4425 	u32 cp_hqd_pq_base_hi;
4426 	u32 cp_hqd_pq_rptr;
4427 	u32 cp_hqd_pq_rptr_report_addr;
4428 	u32 cp_hqd_pq_rptr_report_addr_hi;
4429 	u32 cp_hqd_pq_wptr_poll_addr;
4430 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4431 	u32 cp_hqd_pq_doorbell_control;
4432 	u32 cp_hqd_pq_wptr;
4433 	u32 cp_hqd_pq_control;
4434 	u32 cp_hqd_ib_base_addr;
4435 	u32 cp_hqd_ib_base_addr_hi;
4436 	u32 cp_hqd_ib_rptr;
4437 	u32 cp_hqd_ib_control;
4438 	u32 cp_hqd_iq_timer;
4439 	u32 cp_hqd_iq_rptr;
4440 	u32 cp_hqd_dequeue_request;
4441 	u32 cp_hqd_dma_offload;
4442 	u32 cp_hqd_sema_cmd;
4443 	u32 cp_hqd_msg_type;
4444 	u32 cp_hqd_atomic0_preop_lo;
4445 	u32 cp_hqd_atomic0_preop_hi;
4446 	u32 cp_hqd_atomic1_preop_lo;
4447 	u32 cp_hqd_atomic1_preop_hi;
4448 	u32 cp_hqd_hq_scheduler0;
4449 	u32 cp_hqd_hq_scheduler1;
4450 	u32 cp_mqd_control;
4451 };
4452 
4453 struct bonaire_mqd
4454 {
4455 	u32 header;
4456 	u32 dispatch_initiator;
4457 	u32 dimensions[3];
4458 	u32 start_idx[3];
4459 	u32 num_threads[3];
4460 	u32 pipeline_stat_enable;
4461 	u32 perf_counter_enable;
4462 	u32 pgm[2];
4463 	u32 tba[2];
4464 	u32 tma[2];
4465 	u32 pgm_rsrc[2];
4466 	u32 vmid;
4467 	u32 resource_limits;
4468 	u32 static_thread_mgmt01[2];
4469 	u32 tmp_ring_size;
4470 	u32 static_thread_mgmt23[2];
4471 	u32 restart[3];
4472 	u32 thread_trace_enable;
4473 	u32 reserved1;
4474 	u32 user_data[16];
4475 	u32 vgtcs_invoke_count[2];
4476 	struct hqd_registers queue_state;
4477 	u32 dequeue_cntr;
4478 	u32 interrupt_queue[64];
4479 };
4480 
4481 /**
4482  * cik_cp_compute_resume - setup the compute queue registers
4483  *
4484  * @rdev: radeon_device pointer
4485  *
4486  * Program the compute queues and test them to make sure they
4487  * are working.
4488  * Returns 0 for success, error for failure.
4489  */
4490 static int cik_cp_compute_resume(struct radeon_device *rdev)
4491 {
4492 	int r, i, idx;
4493 	u32 tmp;
4494 	bool use_doorbell = true;
4495 	u64 hqd_gpu_addr;
4496 	u64 mqd_gpu_addr;
4497 	u64 eop_gpu_addr;
4498 	u64 wb_gpu_addr;
4499 	u32 *buf;
4500 	struct bonaire_mqd *mqd;
4501 
4502 	r = cik_cp_compute_start(rdev);
4503 	if (r)
4504 		return r;
4505 
4506 	/* fix up chicken bits */
4507 	tmp = RREG32(CP_CPF_DEBUG);
4508 	tmp |= (1 << 23);
4509 	WREG32(CP_CPF_DEBUG, tmp);
4510 
4511 	/* init the pipes */
4512 	mutex_lock(&rdev->srbm_mutex);
4513 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4514 		int me = (i < 4) ? 1 : 2;
4515 		int pipe = (i < 4) ? i : (i - 4);
4516 
4517 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4518 
4519 		cik_srbm_select(rdev, me, pipe, 0, 0);
4520 
4521 		/* write the EOP addr */
4522 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4523 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4524 
4525 		/* set the VMID assigned */
4526 		WREG32(CP_HPD_EOP_VMID, 0);
4527 
4528 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4529 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4530 		tmp &= ~EOP_SIZE_MASK;
4531 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4532 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4533 	}
4534 	cik_srbm_select(rdev, 0, 0, 0, 0);
4535 	mutex_unlock(&rdev->srbm_mutex);
4536 
4537 	/* init the queues.  Just two for now. */
4538 	for (i = 0; i < 2; i++) {
4539 		if (i == 0)
4540 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4541 		else
4542 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4543 
4544 		if (rdev->ring[idx].mqd_obj == NULL) {
4545 			r = radeon_bo_create(rdev,
4546 					     sizeof(struct bonaire_mqd),
4547 					     PAGE_SIZE, true,
4548 					     RADEON_GEM_DOMAIN_GTT, NULL,
4549 					     &rdev->ring[idx].mqd_obj);
4550 			if (r) {
4551 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4552 				return r;
4553 			}
4554 		}
4555 
4556 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4557 		if (unlikely(r != 0)) {
4558 			cik_cp_compute_fini(rdev);
4559 			return r;
4560 		}
4561 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4562 				  &mqd_gpu_addr);
4563 		if (r) {
4564 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4565 			cik_cp_compute_fini(rdev);
4566 			return r;
4567 		}
4568 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4569 		if (r) {
4570 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4571 			cik_cp_compute_fini(rdev);
4572 			return r;
4573 		}
4574 
4575 		/* init the mqd struct */
4576 		memset(buf, 0, sizeof(struct bonaire_mqd));
4577 
4578 		mqd = (struct bonaire_mqd *)buf;
4579 		mqd->header = 0xC0310800;
4580 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4581 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4582 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4583 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4584 
4585 		mutex_lock(&rdev->srbm_mutex);
4586 		cik_srbm_select(rdev, rdev->ring[idx].me,
4587 				rdev->ring[idx].pipe,
4588 				rdev->ring[idx].queue, 0);
4589 
4590 		/* disable wptr polling */
4591 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4592 		tmp &= ~WPTR_POLL_EN;
4593 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4594 
4595 		/* enable doorbell? */
4596 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4597 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4598 		if (use_doorbell)
4599 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4600 		else
4601 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4602 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4603 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4604 
4605 		/* disable the queue if it's active */
4606 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4607 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4608 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4609 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4610 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4611 			for (i = 0; i < rdev->usec_timeout; i++) {
4612 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4613 					break;
4614 				udelay(1);
4615 			}
4616 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4617 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4618 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4619 		}
4620 
4621 		/* set the pointer to the MQD */
4622 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4623 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4624 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4625 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4626 		/* set MQD vmid to 0 */
4627 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4628 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4629 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4630 
4631 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4632 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4633 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4634 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4635 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4636 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4637 
4638 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4639 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4640 		mqd->queue_state.cp_hqd_pq_control &=
4641 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4642 
4643 		mqd->queue_state.cp_hqd_pq_control |=
4644 			order_base_2(rdev->ring[idx].ring_size / 8);
4645 		mqd->queue_state.cp_hqd_pq_control |=
4646 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4647 #ifdef __BIG_ENDIAN
4648 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4649 #endif
4650 		mqd->queue_state.cp_hqd_pq_control &=
4651 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4652 		mqd->queue_state.cp_hqd_pq_control |=
4653 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4654 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4655 
4656 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4657 		if (i == 0)
4658 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4659 		else
4660 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4661 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4662 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4663 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4664 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4665 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4666 
4667 		/* set the wb address wether it's enabled or not */
4668 		if (i == 0)
4669 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4670 		else
4671 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4672 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4673 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4674 			upper_32_bits(wb_gpu_addr) & 0xffff;
4675 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4676 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4677 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4678 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4679 
4680 		/* enable the doorbell if requested */
4681 		if (use_doorbell) {
4682 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4683 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4684 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4685 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4686 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4687 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4688 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4689 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4690 
4691 		} else {
4692 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4693 		}
4694 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4695 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4696 
4697 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4698 		rdev->ring[idx].wptr = 0;
4699 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4700 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4701 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4702 
4703 		/* set the vmid for the queue */
4704 		mqd->queue_state.cp_hqd_vmid = 0;
4705 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4706 
4707 		/* activate the queue */
4708 		mqd->queue_state.cp_hqd_active = 1;
4709 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4710 
4711 		cik_srbm_select(rdev, 0, 0, 0, 0);
4712 		mutex_unlock(&rdev->srbm_mutex);
4713 
4714 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4715 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4716 
4717 		rdev->ring[idx].ready = true;
4718 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4719 		if (r)
4720 			rdev->ring[idx].ready = false;
4721 	}
4722 
4723 	return 0;
4724 }
4725 
4726 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4727 {
4728 	cik_cp_gfx_enable(rdev, enable);
4729 	cik_cp_compute_enable(rdev, enable);
4730 }
4731 
4732 static int cik_cp_load_microcode(struct radeon_device *rdev)
4733 {
4734 	int r;
4735 
4736 	r = cik_cp_gfx_load_microcode(rdev);
4737 	if (r)
4738 		return r;
4739 	r = cik_cp_compute_load_microcode(rdev);
4740 	if (r)
4741 		return r;
4742 
4743 	return 0;
4744 }
4745 
4746 static void cik_cp_fini(struct radeon_device *rdev)
4747 {
4748 	cik_cp_gfx_fini(rdev);
4749 	cik_cp_compute_fini(rdev);
4750 }
4751 
4752 static int cik_cp_resume(struct radeon_device *rdev)
4753 {
4754 	int r;
4755 
4756 	cik_enable_gui_idle_interrupt(rdev, false);
4757 
4758 	r = cik_cp_load_microcode(rdev);
4759 	if (r)
4760 		return r;
4761 
4762 	r = cik_cp_gfx_resume(rdev);
4763 	if (r)
4764 		return r;
4765 	r = cik_cp_compute_resume(rdev);
4766 	if (r)
4767 		return r;
4768 
4769 	cik_enable_gui_idle_interrupt(rdev, true);
4770 
4771 	return 0;
4772 }
4773 
4774 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4775 {
4776 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4777 		RREG32(GRBM_STATUS));
4778 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4779 		RREG32(GRBM_STATUS2));
4780 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4781 		RREG32(GRBM_STATUS_SE0));
4782 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4783 		RREG32(GRBM_STATUS_SE1));
4784 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4785 		RREG32(GRBM_STATUS_SE2));
4786 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4787 		RREG32(GRBM_STATUS_SE3));
4788 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4789 		RREG32(SRBM_STATUS));
4790 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4791 		RREG32(SRBM_STATUS2));
4792 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4793 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4794 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4795 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4796 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4797 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4798 		 RREG32(CP_STALLED_STAT1));
4799 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4800 		 RREG32(CP_STALLED_STAT2));
4801 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4802 		 RREG32(CP_STALLED_STAT3));
4803 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4804 		 RREG32(CP_CPF_BUSY_STAT));
4805 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4806 		 RREG32(CP_CPF_STALLED_STAT1));
4807 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4808 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4809 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4810 		 RREG32(CP_CPC_STALLED_STAT1));
4811 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4812 }
4813 
4814 /**
4815  * cik_gpu_check_soft_reset - check which blocks are busy
4816  *
4817  * @rdev: radeon_device pointer
4818  *
4819  * Check which blocks are busy and return the relevant reset
4820  * mask to be used by cik_gpu_soft_reset().
4821  * Returns a mask of the blocks to be reset.
4822  */
4823 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4824 {
4825 	u32 reset_mask = 0;
4826 	u32 tmp;
4827 
4828 	/* GRBM_STATUS */
4829 	tmp = RREG32(GRBM_STATUS);
4830 	if (tmp & (PA_BUSY | SC_BUSY |
4831 		   BCI_BUSY | SX_BUSY |
4832 		   TA_BUSY | VGT_BUSY |
4833 		   DB_BUSY | CB_BUSY |
4834 		   GDS_BUSY | SPI_BUSY |
4835 		   IA_BUSY | IA_BUSY_NO_DMA))
4836 		reset_mask |= RADEON_RESET_GFX;
4837 
4838 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4839 		reset_mask |= RADEON_RESET_CP;
4840 
4841 	/* GRBM_STATUS2 */
4842 	tmp = RREG32(GRBM_STATUS2);
4843 	if (tmp & RLC_BUSY)
4844 		reset_mask |= RADEON_RESET_RLC;
4845 
4846 	/* SDMA0_STATUS_REG */
4847 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4848 	if (!(tmp & SDMA_IDLE))
4849 		reset_mask |= RADEON_RESET_DMA;
4850 
4851 	/* SDMA1_STATUS_REG */
4852 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4853 	if (!(tmp & SDMA_IDLE))
4854 		reset_mask |= RADEON_RESET_DMA1;
4855 
4856 	/* SRBM_STATUS2 */
4857 	tmp = RREG32(SRBM_STATUS2);
4858 	if (tmp & SDMA_BUSY)
4859 		reset_mask |= RADEON_RESET_DMA;
4860 
4861 	if (tmp & SDMA1_BUSY)
4862 		reset_mask |= RADEON_RESET_DMA1;
4863 
4864 	/* SRBM_STATUS */
4865 	tmp = RREG32(SRBM_STATUS);
4866 
4867 	if (tmp & IH_BUSY)
4868 		reset_mask |= RADEON_RESET_IH;
4869 
4870 	if (tmp & SEM_BUSY)
4871 		reset_mask |= RADEON_RESET_SEM;
4872 
4873 	if (tmp & GRBM_RQ_PENDING)
4874 		reset_mask |= RADEON_RESET_GRBM;
4875 
4876 	if (tmp & VMC_BUSY)
4877 		reset_mask |= RADEON_RESET_VMC;
4878 
4879 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4880 		   MCC_BUSY | MCD_BUSY))
4881 		reset_mask |= RADEON_RESET_MC;
4882 
4883 	if (evergreen_is_display_hung(rdev))
4884 		reset_mask |= RADEON_RESET_DISPLAY;
4885 
4886 	/* Skip MC reset as it's mostly likely not hung, just busy */
4887 	if (reset_mask & RADEON_RESET_MC) {
4888 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4889 		reset_mask &= ~RADEON_RESET_MC;
4890 	}
4891 
4892 	return reset_mask;
4893 }
4894 
4895 /**
4896  * cik_gpu_soft_reset - soft reset GPU
4897  *
4898  * @rdev: radeon_device pointer
4899  * @reset_mask: mask of which blocks to reset
4900  *
4901  * Soft reset the blocks specified in @reset_mask.
4902  */
4903 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4904 {
4905 	struct evergreen_mc_save save;
4906 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4907 	u32 tmp;
4908 
4909 	if (reset_mask == 0)
4910 		return;
4911 
4912 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4913 
4914 	cik_print_gpu_status_regs(rdev);
4915 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4916 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4917 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4918 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4919 
4920 	/* disable CG/PG */
4921 	cik_fini_pg(rdev);
4922 	cik_fini_cg(rdev);
4923 
4924 	/* stop the rlc */
4925 	cik_rlc_stop(rdev);
4926 
4927 	/* Disable GFX parsing/prefetching */
4928 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4929 
4930 	/* Disable MEC parsing/prefetching */
4931 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4932 
4933 	if (reset_mask & RADEON_RESET_DMA) {
4934 		/* sdma0 */
4935 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4936 		tmp |= SDMA_HALT;
4937 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4938 	}
4939 	if (reset_mask & RADEON_RESET_DMA1) {
4940 		/* sdma1 */
4941 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4942 		tmp |= SDMA_HALT;
4943 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4944 	}
4945 
4946 	evergreen_mc_stop(rdev, &save);
4947 	if (evergreen_mc_wait_for_idle(rdev)) {
4948 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4949 	}
4950 
4951 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4952 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4953 
4954 	if (reset_mask & RADEON_RESET_CP) {
4955 		grbm_soft_reset |= SOFT_RESET_CP;
4956 
4957 		srbm_soft_reset |= SOFT_RESET_GRBM;
4958 	}
4959 
4960 	if (reset_mask & RADEON_RESET_DMA)
4961 		srbm_soft_reset |= SOFT_RESET_SDMA;
4962 
4963 	if (reset_mask & RADEON_RESET_DMA1)
4964 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4965 
4966 	if (reset_mask & RADEON_RESET_DISPLAY)
4967 		srbm_soft_reset |= SOFT_RESET_DC;
4968 
4969 	if (reset_mask & RADEON_RESET_RLC)
4970 		grbm_soft_reset |= SOFT_RESET_RLC;
4971 
4972 	if (reset_mask & RADEON_RESET_SEM)
4973 		srbm_soft_reset |= SOFT_RESET_SEM;
4974 
4975 	if (reset_mask & RADEON_RESET_IH)
4976 		srbm_soft_reset |= SOFT_RESET_IH;
4977 
4978 	if (reset_mask & RADEON_RESET_GRBM)
4979 		srbm_soft_reset |= SOFT_RESET_GRBM;
4980 
4981 	if (reset_mask & RADEON_RESET_VMC)
4982 		srbm_soft_reset |= SOFT_RESET_VMC;
4983 
4984 	if (!(rdev->flags & RADEON_IS_IGP)) {
4985 		if (reset_mask & RADEON_RESET_MC)
4986 			srbm_soft_reset |= SOFT_RESET_MC;
4987 	}
4988 
4989 	if (grbm_soft_reset) {
4990 		tmp = RREG32(GRBM_SOFT_RESET);
4991 		tmp |= grbm_soft_reset;
4992 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4993 		WREG32(GRBM_SOFT_RESET, tmp);
4994 		tmp = RREG32(GRBM_SOFT_RESET);
4995 
4996 		udelay(50);
4997 
4998 		tmp &= ~grbm_soft_reset;
4999 		WREG32(GRBM_SOFT_RESET, tmp);
5000 		tmp = RREG32(GRBM_SOFT_RESET);
5001 	}
5002 
5003 	if (srbm_soft_reset) {
5004 		tmp = RREG32(SRBM_SOFT_RESET);
5005 		tmp |= srbm_soft_reset;
5006 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5007 		WREG32(SRBM_SOFT_RESET, tmp);
5008 		tmp = RREG32(SRBM_SOFT_RESET);
5009 
5010 		udelay(50);
5011 
5012 		tmp &= ~srbm_soft_reset;
5013 		WREG32(SRBM_SOFT_RESET, tmp);
5014 		tmp = RREG32(SRBM_SOFT_RESET);
5015 	}
5016 
5017 	/* Wait a little for things to settle down */
5018 	udelay(50);
5019 
5020 	evergreen_mc_resume(rdev, &save);
5021 	udelay(50);
5022 
5023 	cik_print_gpu_status_regs(rdev);
5024 }
5025 
5026 struct kv_reset_save_regs {
5027 	u32 gmcon_reng_execute;
5028 	u32 gmcon_misc;
5029 	u32 gmcon_misc3;
5030 };
5031 
5032 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5033 				   struct kv_reset_save_regs *save)
5034 {
5035 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5036 	save->gmcon_misc = RREG32(GMCON_MISC);
5037 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5038 
5039 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5040 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5041 						STCTRL_STUTTER_EN));
5042 }
5043 
5044 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5045 				      struct kv_reset_save_regs *save)
5046 {
5047 	int i;
5048 
5049 	WREG32(GMCON_PGFSM_WRITE, 0);
5050 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5051 
5052 	for (i = 0; i < 5; i++)
5053 		WREG32(GMCON_PGFSM_WRITE, 0);
5054 
5055 	WREG32(GMCON_PGFSM_WRITE, 0);
5056 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5057 
5058 	for (i = 0; i < 5; i++)
5059 		WREG32(GMCON_PGFSM_WRITE, 0);
5060 
5061 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5062 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5063 
5064 	for (i = 0; i < 5; i++)
5065 		WREG32(GMCON_PGFSM_WRITE, 0);
5066 
5067 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5068 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5069 
5070 	for (i = 0; i < 5; i++)
5071 		WREG32(GMCON_PGFSM_WRITE, 0);
5072 
5073 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5074 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5075 
5076 	for (i = 0; i < 5; i++)
5077 		WREG32(GMCON_PGFSM_WRITE, 0);
5078 
5079 	WREG32(GMCON_PGFSM_WRITE, 0);
5080 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5081 
5082 	for (i = 0; i < 5; i++)
5083 		WREG32(GMCON_PGFSM_WRITE, 0);
5084 
5085 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5086 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5087 
5088 	for (i = 0; i < 5; i++)
5089 		WREG32(GMCON_PGFSM_WRITE, 0);
5090 
5091 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5092 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5093 
5094 	for (i = 0; i < 5; i++)
5095 		WREG32(GMCON_PGFSM_WRITE, 0);
5096 
5097 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5098 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5099 
5100 	for (i = 0; i < 5; i++)
5101 		WREG32(GMCON_PGFSM_WRITE, 0);
5102 
5103 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5104 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5105 
5106 	for (i = 0; i < 5; i++)
5107 		WREG32(GMCON_PGFSM_WRITE, 0);
5108 
5109 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5110 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5111 
5112 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5113 	WREG32(GMCON_MISC, save->gmcon_misc);
5114 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5115 }
5116 
5117 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5118 {
5119 	struct evergreen_mc_save save;
5120 	struct kv_reset_save_regs kv_save = { 0 };
5121 	u32 tmp, i;
5122 
5123 	dev_info(rdev->dev, "GPU pci config reset\n");
5124 
5125 	/* disable dpm? */
5126 
5127 	/* disable cg/pg */
5128 	cik_fini_pg(rdev);
5129 	cik_fini_cg(rdev);
5130 
5131 	/* Disable GFX parsing/prefetching */
5132 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5133 
5134 	/* Disable MEC parsing/prefetching */
5135 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5136 
5137 	/* sdma0 */
5138 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5139 	tmp |= SDMA_HALT;
5140 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5141 	/* sdma1 */
5142 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5143 	tmp |= SDMA_HALT;
5144 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5145 	/* XXX other engines? */
5146 
5147 	/* halt the rlc, disable cp internal ints */
5148 	cik_rlc_stop(rdev);
5149 
5150 	udelay(50);
5151 
5152 	/* disable mem access */
5153 	evergreen_mc_stop(rdev, &save);
5154 	if (evergreen_mc_wait_for_idle(rdev)) {
5155 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5156 	}
5157 
5158 	if (rdev->flags & RADEON_IS_IGP)
5159 		kv_save_regs_for_reset(rdev, &kv_save);
5160 
5161 	/* disable BM */
5162 	pci_clear_master(rdev->pdev);
5163 	/* reset */
5164 	radeon_pci_config_reset(rdev);
5165 
5166 	udelay(100);
5167 
5168 	/* wait for asic to come out of reset */
5169 	for (i = 0; i < rdev->usec_timeout; i++) {
5170 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5171 			break;
5172 		udelay(1);
5173 	}
5174 
5175 	/* does asic init need to be run first??? */
5176 	if (rdev->flags & RADEON_IS_IGP)
5177 		kv_restore_regs_for_reset(rdev, &kv_save);
5178 }
5179 
5180 /**
5181  * cik_asic_reset - soft reset GPU
5182  *
5183  * @rdev: radeon_device pointer
5184  *
5185  * Look up which blocks are hung and attempt
5186  * to reset them.
5187  * Returns 0 for success.
5188  */
5189 int cik_asic_reset(struct radeon_device *rdev)
5190 {
5191 	u32 reset_mask;
5192 
5193 	reset_mask = cik_gpu_check_soft_reset(rdev);
5194 
5195 	if (reset_mask)
5196 		r600_set_bios_scratch_engine_hung(rdev, true);
5197 
5198 	/* try soft reset */
5199 	cik_gpu_soft_reset(rdev, reset_mask);
5200 
5201 	reset_mask = cik_gpu_check_soft_reset(rdev);
5202 
5203 	/* try pci config reset */
5204 	if (reset_mask && radeon_hard_reset)
5205 		cik_gpu_pci_config_reset(rdev);
5206 
5207 	reset_mask = cik_gpu_check_soft_reset(rdev);
5208 
5209 	if (!reset_mask)
5210 		r600_set_bios_scratch_engine_hung(rdev, false);
5211 
5212 	return 0;
5213 }
5214 
5215 /**
5216  * cik_gfx_is_lockup - check if the 3D engine is locked up
5217  *
5218  * @rdev: radeon_device pointer
5219  * @ring: radeon_ring structure holding ring information
5220  *
5221  * Check if the 3D engine is locked up (CIK).
5222  * Returns true if the engine is locked, false if not.
5223  */
5224 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5225 {
5226 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5227 
5228 	if (!(reset_mask & (RADEON_RESET_GFX |
5229 			    RADEON_RESET_COMPUTE |
5230 			    RADEON_RESET_CP))) {
5231 		radeon_ring_lockup_update(rdev, ring);
5232 		return false;
5233 	}
5234 	return radeon_ring_test_lockup(rdev, ring);
5235 }
5236 
5237 /* MC */
5238 /**
5239  * cik_mc_program - program the GPU memory controller
5240  *
5241  * @rdev: radeon_device pointer
5242  *
5243  * Set the location of vram, gart, and AGP in the GPU's
5244  * physical address space (CIK).
5245  */
5246 static void cik_mc_program(struct radeon_device *rdev)
5247 {
5248 	struct evergreen_mc_save save;
5249 	u32 tmp;
5250 	int i, j;
5251 
5252 	/* Initialize HDP */
5253 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5254 		WREG32((0x2c14 + j), 0x00000000);
5255 		WREG32((0x2c18 + j), 0x00000000);
5256 		WREG32((0x2c1c + j), 0x00000000);
5257 		WREG32((0x2c20 + j), 0x00000000);
5258 		WREG32((0x2c24 + j), 0x00000000);
5259 	}
5260 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5261 
5262 	evergreen_mc_stop(rdev, &save);
5263 	if (radeon_mc_wait_for_idle(rdev)) {
5264 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5265 	}
5266 	/* Lockout access through VGA aperture*/
5267 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5268 	/* Update configuration */
5269 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5270 	       rdev->mc.vram_start >> 12);
5271 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5272 	       rdev->mc.vram_end >> 12);
5273 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5274 	       rdev->vram_scratch.gpu_addr >> 12);
5275 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5276 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5277 	WREG32(MC_VM_FB_LOCATION, tmp);
5278 	/* XXX double check these! */
5279 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5280 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5281 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5282 	WREG32(MC_VM_AGP_BASE, 0);
5283 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5284 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5285 	if (radeon_mc_wait_for_idle(rdev)) {
5286 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5287 	}
5288 	evergreen_mc_resume(rdev, &save);
5289 	/* we need to own VRAM, so turn off the VGA renderer here
5290 	 * to stop it overwriting our objects */
5291 	rv515_vga_render_disable(rdev);
5292 }
5293 
5294 /**
5295  * cik_mc_init - initialize the memory controller driver params
5296  *
5297  * @rdev: radeon_device pointer
5298  *
5299  * Look up the amount of vram, vram width, and decide how to place
5300  * vram and gart within the GPU's physical address space (CIK).
5301  * Returns 0 for success.
5302  */
5303 static int cik_mc_init(struct radeon_device *rdev)
5304 {
5305 	u32 tmp;
5306 	int chansize, numchan;
5307 
5308 	/* Get VRAM informations */
5309 	rdev->mc.vram_is_ddr = true;
5310 	tmp = RREG32(MC_ARB_RAMCFG);
5311 	if (tmp & CHANSIZE_MASK) {
5312 		chansize = 64;
5313 	} else {
5314 		chansize = 32;
5315 	}
5316 	tmp = RREG32(MC_SHARED_CHMAP);
5317 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5318 	case 0:
5319 	default:
5320 		numchan = 1;
5321 		break;
5322 	case 1:
5323 		numchan = 2;
5324 		break;
5325 	case 2:
5326 		numchan = 4;
5327 		break;
5328 	case 3:
5329 		numchan = 8;
5330 		break;
5331 	case 4:
5332 		numchan = 3;
5333 		break;
5334 	case 5:
5335 		numchan = 6;
5336 		break;
5337 	case 6:
5338 		numchan = 10;
5339 		break;
5340 	case 7:
5341 		numchan = 12;
5342 		break;
5343 	case 8:
5344 		numchan = 16;
5345 		break;
5346 	}
5347 	rdev->mc.vram_width = numchan * chansize;
5348 	/* Could aper size report 0 ? */
5349 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5350 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5351 	/* size in MB on si */
5352 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5353 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5354 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5355 	si_vram_gtt_location(rdev, &rdev->mc);
5356 	radeon_update_bandwidth_info(rdev);
5357 
5358 	return 0;
5359 }
5360 
5361 /*
5362  * GART
5363  * VMID 0 is the physical GPU addresses as used by the kernel.
5364  * VMIDs 1-15 are used for userspace clients and are handled
5365  * by the radeon vm/hsa code.
5366  */
5367 /**
5368  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5369  *
5370  * @rdev: radeon_device pointer
5371  *
5372  * Flush the TLB for the VMID 0 page table (CIK).
5373  */
5374 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5375 {
5376 	/* flush hdp cache */
5377 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5378 
5379 	/* bits 0-15 are the VM contexts0-15 */
5380 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5381 }
5382 
5383 /**
5384  * cik_pcie_gart_enable - gart enable
5385  *
5386  * @rdev: radeon_device pointer
5387  *
5388  * This sets up the TLBs, programs the page tables for VMID0,
5389  * sets up the hw for VMIDs 1-15 which are allocated on
5390  * demand, and sets up the global locations for the LDS, GDS,
5391  * and GPUVM for FSA64 clients (CIK).
5392  * Returns 0 for success, errors for failure.
5393  */
5394 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5395 {
5396 	int r, i;
5397 
5398 	if (rdev->gart.robj == NULL) {
5399 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5400 		return -EINVAL;
5401 	}
5402 	r = radeon_gart_table_vram_pin(rdev);
5403 	if (r)
5404 		return r;
5405 	radeon_gart_restore(rdev);
5406 	/* Setup TLB control */
5407 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5408 	       (0xA << 7) |
5409 	       ENABLE_L1_TLB |
5410 	       ENABLE_L1_FRAGMENT_PROCESSING |
5411 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5412 	       ENABLE_ADVANCED_DRIVER_MODEL |
5413 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5414 	/* Setup L2 cache */
5415 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5416 	       ENABLE_L2_FRAGMENT_PROCESSING |
5417 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5418 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5419 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5420 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5421 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5422 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5423 	       BANK_SELECT(4) |
5424 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5425 	/* setup context0 */
5426 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5427 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5428 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5429 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5430 			(u32)(rdev->dummy_page.addr >> 12));
5431 	WREG32(VM_CONTEXT0_CNTL2, 0);
5432 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5433 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5434 
5435 	WREG32(0x15D4, 0);
5436 	WREG32(0x15D8, 0);
5437 	WREG32(0x15DC, 0);
5438 
5439 	/* empty context1-15 */
5440 	/* FIXME start with 4G, once using 2 level pt switch to full
5441 	 * vm size space
5442 	 */
5443 	/* set vm size, must be a multiple of 4 */
5444 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5445 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5446 	for (i = 1; i < 16; i++) {
5447 		if (i < 8)
5448 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5449 			       rdev->gart.table_addr >> 12);
5450 		else
5451 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5452 			       rdev->gart.table_addr >> 12);
5453 	}
5454 
5455 	/* enable context1-15 */
5456 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5457 	       (u32)(rdev->dummy_page.addr >> 12));
5458 	WREG32(VM_CONTEXT1_CNTL2, 4);
5459 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5460 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5461 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5462 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5463 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5464 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5465 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5466 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5467 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5468 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5469 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5470 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5471 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5472 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5473 
5474 	if (rdev->family == CHIP_KAVERI) {
5475 		u32 tmp = RREG32(CHUB_CONTROL);
5476 		tmp &= ~BYPASS_VM;
5477 		WREG32(CHUB_CONTROL, tmp);
5478 	}
5479 
5480 	/* XXX SH_MEM regs */
5481 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5482 	mutex_lock(&rdev->srbm_mutex);
5483 	for (i = 0; i < 16; i++) {
5484 		cik_srbm_select(rdev, 0, 0, 0, i);
5485 		/* CP and shaders */
5486 		WREG32(SH_MEM_CONFIG, 0);
5487 		WREG32(SH_MEM_APE1_BASE, 1);
5488 		WREG32(SH_MEM_APE1_LIMIT, 0);
5489 		WREG32(SH_MEM_BASES, 0);
5490 		/* SDMA GFX */
5491 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5492 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5493 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5494 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5495 		/* XXX SDMA RLC - todo */
5496 	}
5497 	cik_srbm_select(rdev, 0, 0, 0, 0);
5498 	mutex_unlock(&rdev->srbm_mutex);
5499 
5500 	cik_pcie_gart_tlb_flush(rdev);
5501 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5502 		 (unsigned)(rdev->mc.gtt_size >> 20),
5503 		 (unsigned long long)rdev->gart.table_addr);
5504 	rdev->gart.ready = true;
5505 	return 0;
5506 }
5507 
5508 /**
5509  * cik_pcie_gart_disable - gart disable
5510  *
5511  * @rdev: radeon_device pointer
5512  *
5513  * This disables all VM page table (CIK).
5514  */
5515 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5516 {
5517 	/* Disable all tables */
5518 	WREG32(VM_CONTEXT0_CNTL, 0);
5519 	WREG32(VM_CONTEXT1_CNTL, 0);
5520 	/* Setup TLB control */
5521 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5522 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5523 	/* Setup L2 cache */
5524 	WREG32(VM_L2_CNTL,
5525 	       ENABLE_L2_FRAGMENT_PROCESSING |
5526 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5527 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5528 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5529 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5530 	WREG32(VM_L2_CNTL2, 0);
5531 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5532 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5533 	radeon_gart_table_vram_unpin(rdev);
5534 }
5535 
5536 /**
5537  * cik_pcie_gart_fini - vm fini callback
5538  *
5539  * @rdev: radeon_device pointer
5540  *
5541  * Tears down the driver GART/VM setup (CIK).
5542  */
5543 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5544 {
5545 	cik_pcie_gart_disable(rdev);
5546 	radeon_gart_table_vram_free(rdev);
5547 	radeon_gart_fini(rdev);
5548 }
5549 
5550 /* vm parser */
5551 /**
5552  * cik_ib_parse - vm ib_parse callback
5553  *
5554  * @rdev: radeon_device pointer
5555  * @ib: indirect buffer pointer
5556  *
5557  * CIK uses hw IB checking so this is a nop (CIK).
5558  */
5559 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5560 {
5561 	return 0;
5562 }
5563 
5564 /*
5565  * vm
5566  * VMID 0 is the physical GPU addresses as used by the kernel.
5567  * VMIDs 1-15 are used for userspace clients and are handled
5568  * by the radeon vm/hsa code.
5569  */
5570 /**
5571  * cik_vm_init - cik vm init callback
5572  *
5573  * @rdev: radeon_device pointer
5574  *
5575  * Inits cik specific vm parameters (number of VMs, base of vram for
5576  * VMIDs 1-15) (CIK).
5577  * Returns 0 for success.
5578  */
5579 int cik_vm_init(struct radeon_device *rdev)
5580 {
5581 	/* number of VMs */
5582 	rdev->vm_manager.nvm = 16;
5583 	/* base offset of vram pages */
5584 	if (rdev->flags & RADEON_IS_IGP) {
5585 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5586 		tmp <<= 22;
5587 		rdev->vm_manager.vram_base_offset = tmp;
5588 	} else
5589 		rdev->vm_manager.vram_base_offset = 0;
5590 
5591 	return 0;
5592 }
5593 
5594 /**
5595  * cik_vm_fini - cik vm fini callback
5596  *
5597  * @rdev: radeon_device pointer
5598  *
5599  * Tear down any asic specific VM setup (CIK).
5600  */
5601 void cik_vm_fini(struct radeon_device *rdev)
5602 {
5603 }
5604 
5605 /**
5606  * cik_vm_decode_fault - print human readable fault info
5607  *
5608  * @rdev: radeon_device pointer
5609  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5610  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5611  *
5612  * Print human readable fault information (CIK).
5613  */
5614 static void cik_vm_decode_fault(struct radeon_device *rdev,
5615 				u32 status, u32 addr, u32 mc_client)
5616 {
5617 	u32 mc_id;
5618 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5619 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5620 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5621 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5622 
5623 	if (rdev->family == CHIP_HAWAII)
5624 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5625 	else
5626 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5627 
5628 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5629 	       protections, vmid, addr,
5630 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5631 	       block, mc_client, mc_id);
5632 }
5633 
5634 /**
5635  * cik_vm_flush - cik vm flush using the CP
5636  *
5637  * @rdev: radeon_device pointer
5638  *
5639  * Update the page table base and flush the VM TLB
5640  * using the CP (CIK).
5641  */
5642 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5643 {
5644 	struct radeon_ring *ring = &rdev->ring[ridx];
5645 
5646 	if (vm == NULL)
5647 		return;
5648 
5649 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651 				 WRITE_DATA_DST_SEL(0)));
5652 	if (vm->id < 8) {
5653 		radeon_ring_write(ring,
5654 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5655 	} else {
5656 		radeon_ring_write(ring,
5657 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5658 	}
5659 	radeon_ring_write(ring, 0);
5660 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5661 
5662 	/* update SH_MEM_* regs */
5663 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5664 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5665 				 WRITE_DATA_DST_SEL(0)));
5666 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5667 	radeon_ring_write(ring, 0);
5668 	radeon_ring_write(ring, VMID(vm->id));
5669 
5670 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5671 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672 				 WRITE_DATA_DST_SEL(0)));
5673 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5674 	radeon_ring_write(ring, 0);
5675 
5676 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5677 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5678 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5679 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5680 
5681 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5682 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5683 				 WRITE_DATA_DST_SEL(0)));
5684 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5685 	radeon_ring_write(ring, 0);
5686 	radeon_ring_write(ring, VMID(0));
5687 
5688 	/* HDP flush */
5689 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5690 
5691 	/* bits 0-15 are the VM contexts0-15 */
5692 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5693 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5694 				 WRITE_DATA_DST_SEL(0)));
5695 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5696 	radeon_ring_write(ring, 0);
5697 	radeon_ring_write(ring, 1 << vm->id);
5698 
5699 	/* compute doesn't have PFP */
5700 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5701 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5702 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5703 		radeon_ring_write(ring, 0x0);
5704 	}
5705 }
5706 
5707 /*
5708  * RLC
5709  * The RLC is a multi-purpose microengine that handles a
5710  * variety of functions, the most important of which is
5711  * the interrupt controller.
5712  */
5713 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5714 					  bool enable)
5715 {
5716 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5717 
5718 	if (enable)
5719 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5720 	else
5721 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5722 	WREG32(CP_INT_CNTL_RING0, tmp);
5723 }
5724 
5725 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5726 {
5727 	u32 tmp;
5728 
5729 	tmp = RREG32(RLC_LB_CNTL);
5730 	if (enable)
5731 		tmp |= LOAD_BALANCE_ENABLE;
5732 	else
5733 		tmp &= ~LOAD_BALANCE_ENABLE;
5734 	WREG32(RLC_LB_CNTL, tmp);
5735 }
5736 
5737 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5738 {
5739 	u32 i, j, k;
5740 	u32 mask;
5741 
5742 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5743 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5744 			cik_select_se_sh(rdev, i, j);
5745 			for (k = 0; k < rdev->usec_timeout; k++) {
5746 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5747 					break;
5748 				udelay(1);
5749 			}
5750 		}
5751 	}
5752 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5753 
5754 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5755 	for (k = 0; k < rdev->usec_timeout; k++) {
5756 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5757 			break;
5758 		udelay(1);
5759 	}
5760 }
5761 
5762 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5763 {
5764 	u32 tmp;
5765 
5766 	tmp = RREG32(RLC_CNTL);
5767 	if (tmp != rlc)
5768 		WREG32(RLC_CNTL, rlc);
5769 }
5770 
5771 static u32 cik_halt_rlc(struct radeon_device *rdev)
5772 {
5773 	u32 data, orig;
5774 
5775 	orig = data = RREG32(RLC_CNTL);
5776 
5777 	if (data & RLC_ENABLE) {
5778 		u32 i;
5779 
5780 		data &= ~RLC_ENABLE;
5781 		WREG32(RLC_CNTL, data);
5782 
5783 		for (i = 0; i < rdev->usec_timeout; i++) {
5784 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5785 				break;
5786 			udelay(1);
5787 		}
5788 
5789 		cik_wait_for_rlc_serdes(rdev);
5790 	}
5791 
5792 	return orig;
5793 }
5794 
5795 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5796 {
5797 	u32 tmp, i, mask;
5798 
5799 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5800 	WREG32(RLC_GPR_REG2, tmp);
5801 
5802 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5803 	for (i = 0; i < rdev->usec_timeout; i++) {
5804 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5805 			break;
5806 		udelay(1);
5807 	}
5808 
5809 	for (i = 0; i < rdev->usec_timeout; i++) {
5810 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5811 			break;
5812 		udelay(1);
5813 	}
5814 }
5815 
5816 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5817 {
5818 	u32 tmp;
5819 
5820 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5821 	WREG32(RLC_GPR_REG2, tmp);
5822 }
5823 
5824 /**
5825  * cik_rlc_stop - stop the RLC ME
5826  *
5827  * @rdev: radeon_device pointer
5828  *
5829  * Halt the RLC ME (MicroEngine) (CIK).
5830  */
5831 static void cik_rlc_stop(struct radeon_device *rdev)
5832 {
5833 	WREG32(RLC_CNTL, 0);
5834 
5835 	cik_enable_gui_idle_interrupt(rdev, false);
5836 
5837 	cik_wait_for_rlc_serdes(rdev);
5838 }
5839 
5840 /**
5841  * cik_rlc_start - start the RLC ME
5842  *
5843  * @rdev: radeon_device pointer
5844  *
5845  * Unhalt the RLC ME (MicroEngine) (CIK).
5846  */
5847 static void cik_rlc_start(struct radeon_device *rdev)
5848 {
5849 	WREG32(RLC_CNTL, RLC_ENABLE);
5850 
5851 	cik_enable_gui_idle_interrupt(rdev, true);
5852 
5853 	udelay(50);
5854 }
5855 
5856 /**
5857  * cik_rlc_resume - setup the RLC hw
5858  *
5859  * @rdev: radeon_device pointer
5860  *
5861  * Initialize the RLC registers, load the ucode,
5862  * and start the RLC (CIK).
5863  * Returns 0 for success, -EINVAL if the ucode is not available.
5864  */
5865 static int cik_rlc_resume(struct radeon_device *rdev)
5866 {
5867 	u32 i, size, tmp;
5868 	const __be32 *fw_data;
5869 
5870 	if (!rdev->rlc_fw)
5871 		return -EINVAL;
5872 
5873 	switch (rdev->family) {
5874 	case CHIP_BONAIRE:
5875 	case CHIP_HAWAII:
5876 	default:
5877 		size = BONAIRE_RLC_UCODE_SIZE;
5878 		break;
5879 	case CHIP_KAVERI:
5880 		size = KV_RLC_UCODE_SIZE;
5881 		break;
5882 	case CHIP_KABINI:
5883 		size = KB_RLC_UCODE_SIZE;
5884 		break;
5885 	case CHIP_MULLINS:
5886 		size = ML_RLC_UCODE_SIZE;
5887 		break;
5888 	}
5889 
5890 	cik_rlc_stop(rdev);
5891 
5892 	/* disable CG */
5893 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5894 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5895 
5896 	si_rlc_reset(rdev);
5897 
5898 	cik_init_pg(rdev);
5899 
5900 	cik_init_cg(rdev);
5901 
5902 	WREG32(RLC_LB_CNTR_INIT, 0);
5903 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5904 
5905 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5906 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5907 	WREG32(RLC_LB_PARAMS, 0x00600408);
5908 	WREG32(RLC_LB_CNTL, 0x80000004);
5909 
5910 	WREG32(RLC_MC_CNTL, 0);
5911 	WREG32(RLC_UCODE_CNTL, 0);
5912 
5913 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5914 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5915 	for (i = 0; i < size; i++)
5916 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5917 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5918 
5919 	/* XXX - find out what chips support lbpw */
5920 	cik_enable_lbpw(rdev, false);
5921 
5922 	if (rdev->family == CHIP_BONAIRE)
5923 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5924 
5925 	cik_rlc_start(rdev);
5926 
5927 	return 0;
5928 }
5929 
5930 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5931 {
5932 	u32 data, orig, tmp, tmp2;
5933 
5934 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5935 
5936 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5937 		cik_enable_gui_idle_interrupt(rdev, true);
5938 
5939 		tmp = cik_halt_rlc(rdev);
5940 
5941 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5942 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5943 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5944 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5945 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5946 
5947 		cik_update_rlc(rdev, tmp);
5948 
5949 		data |= CGCG_EN | CGLS_EN;
5950 	} else {
5951 		cik_enable_gui_idle_interrupt(rdev, false);
5952 
5953 		RREG32(CB_CGTT_SCLK_CTRL);
5954 		RREG32(CB_CGTT_SCLK_CTRL);
5955 		RREG32(CB_CGTT_SCLK_CTRL);
5956 		RREG32(CB_CGTT_SCLK_CTRL);
5957 
5958 		data &= ~(CGCG_EN | CGLS_EN);
5959 	}
5960 
5961 	if (orig != data)
5962 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5963 
5964 }
5965 
5966 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5967 {
5968 	u32 data, orig, tmp = 0;
5969 
5970 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5971 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5972 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5973 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5974 				data |= CP_MEM_LS_EN;
5975 				if (orig != data)
5976 					WREG32(CP_MEM_SLP_CNTL, data);
5977 			}
5978 		}
5979 
5980 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5981 		data &= 0xfffffffd;
5982 		if (orig != data)
5983 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5984 
5985 		tmp = cik_halt_rlc(rdev);
5986 
5987 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5988 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5989 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5990 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5991 		WREG32(RLC_SERDES_WR_CTRL, data);
5992 
5993 		cik_update_rlc(rdev, tmp);
5994 
5995 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5996 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5997 			data &= ~SM_MODE_MASK;
5998 			data |= SM_MODE(0x2);
5999 			data |= SM_MODE_ENABLE;
6000 			data &= ~CGTS_OVERRIDE;
6001 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6002 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6003 				data &= ~CGTS_LS_OVERRIDE;
6004 			data &= ~ON_MONITOR_ADD_MASK;
6005 			data |= ON_MONITOR_ADD_EN;
6006 			data |= ON_MONITOR_ADD(0x96);
6007 			if (orig != data)
6008 				WREG32(CGTS_SM_CTRL_REG, data);
6009 		}
6010 	} else {
6011 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6012 		data |= 0x00000002;
6013 		if (orig != data)
6014 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6015 
6016 		data = RREG32(RLC_MEM_SLP_CNTL);
6017 		if (data & RLC_MEM_LS_EN) {
6018 			data &= ~RLC_MEM_LS_EN;
6019 			WREG32(RLC_MEM_SLP_CNTL, data);
6020 		}
6021 
6022 		data = RREG32(CP_MEM_SLP_CNTL);
6023 		if (data & CP_MEM_LS_EN) {
6024 			data &= ~CP_MEM_LS_EN;
6025 			WREG32(CP_MEM_SLP_CNTL, data);
6026 		}
6027 
6028 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6029 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6030 		if (orig != data)
6031 			WREG32(CGTS_SM_CTRL_REG, data);
6032 
6033 		tmp = cik_halt_rlc(rdev);
6034 
6035 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6036 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6037 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6038 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6039 		WREG32(RLC_SERDES_WR_CTRL, data);
6040 
6041 		cik_update_rlc(rdev, tmp);
6042 	}
6043 }
6044 
6045 static const u32 mc_cg_registers[] =
6046 {
6047 	MC_HUB_MISC_HUB_CG,
6048 	MC_HUB_MISC_SIP_CG,
6049 	MC_HUB_MISC_VM_CG,
6050 	MC_XPB_CLK_GAT,
6051 	ATC_MISC_CG,
6052 	MC_CITF_MISC_WR_CG,
6053 	MC_CITF_MISC_RD_CG,
6054 	MC_CITF_MISC_VM_CG,
6055 	VM_L2_CG,
6056 };
6057 
6058 static void cik_enable_mc_ls(struct radeon_device *rdev,
6059 			     bool enable)
6060 {
6061 	int i;
6062 	u32 orig, data;
6063 
6064 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6065 		orig = data = RREG32(mc_cg_registers[i]);
6066 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6067 			data |= MC_LS_ENABLE;
6068 		else
6069 			data &= ~MC_LS_ENABLE;
6070 		if (data != orig)
6071 			WREG32(mc_cg_registers[i], data);
6072 	}
6073 }
6074 
6075 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6076 			       bool enable)
6077 {
6078 	int i;
6079 	u32 orig, data;
6080 
6081 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6082 		orig = data = RREG32(mc_cg_registers[i]);
6083 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6084 			data |= MC_CG_ENABLE;
6085 		else
6086 			data &= ~MC_CG_ENABLE;
6087 		if (data != orig)
6088 			WREG32(mc_cg_registers[i], data);
6089 	}
6090 }
6091 
6092 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6093 				 bool enable)
6094 {
6095 	u32 orig, data;
6096 
6097 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6098 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6099 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6100 	} else {
6101 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6102 		data |= 0xff000000;
6103 		if (data != orig)
6104 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6105 
6106 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6107 		data |= 0xff000000;
6108 		if (data != orig)
6109 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6110 	}
6111 }
6112 
6113 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6114 				 bool enable)
6115 {
6116 	u32 orig, data;
6117 
6118 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6119 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6120 		data |= 0x100;
6121 		if (orig != data)
6122 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6123 
6124 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6125 		data |= 0x100;
6126 		if (orig != data)
6127 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6128 	} else {
6129 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6130 		data &= ~0x100;
6131 		if (orig != data)
6132 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6133 
6134 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6135 		data &= ~0x100;
6136 		if (orig != data)
6137 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6138 	}
6139 }
6140 
6141 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6142 				bool enable)
6143 {
6144 	u32 orig, data;
6145 
6146 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6147 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6148 		data = 0xfff;
6149 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6150 
6151 		orig = data = RREG32(UVD_CGC_CTRL);
6152 		data |= DCM;
6153 		if (orig != data)
6154 			WREG32(UVD_CGC_CTRL, data);
6155 	} else {
6156 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6157 		data &= ~0xfff;
6158 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6159 
6160 		orig = data = RREG32(UVD_CGC_CTRL);
6161 		data &= ~DCM;
6162 		if (orig != data)
6163 			WREG32(UVD_CGC_CTRL, data);
6164 	}
6165 }
6166 
6167 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6168 			       bool enable)
6169 {
6170 	u32 orig, data;
6171 
6172 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6173 
6174 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6175 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6176 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6177 	else
6178 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6179 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6180 
6181 	if (orig != data)
6182 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6183 }
6184 
6185 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6186 				bool enable)
6187 {
6188 	u32 orig, data;
6189 
6190 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6191 
6192 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6193 		data &= ~CLOCK_GATING_DIS;
6194 	else
6195 		data |= CLOCK_GATING_DIS;
6196 
6197 	if (orig != data)
6198 		WREG32(HDP_HOST_PATH_CNTL, data);
6199 }
6200 
6201 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6202 			      bool enable)
6203 {
6204 	u32 orig, data;
6205 
6206 	orig = data = RREG32(HDP_MEM_POWER_LS);
6207 
6208 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6209 		data |= HDP_LS_ENABLE;
6210 	else
6211 		data &= ~HDP_LS_ENABLE;
6212 
6213 	if (orig != data)
6214 		WREG32(HDP_MEM_POWER_LS, data);
6215 }
6216 
6217 void cik_update_cg(struct radeon_device *rdev,
6218 		   u32 block, bool enable)
6219 {
6220 
6221 	if (block & RADEON_CG_BLOCK_GFX) {
6222 		cik_enable_gui_idle_interrupt(rdev, false);
6223 		/* order matters! */
6224 		if (enable) {
6225 			cik_enable_mgcg(rdev, true);
6226 			cik_enable_cgcg(rdev, true);
6227 		} else {
6228 			cik_enable_cgcg(rdev, false);
6229 			cik_enable_mgcg(rdev, false);
6230 		}
6231 		cik_enable_gui_idle_interrupt(rdev, true);
6232 	}
6233 
6234 	if (block & RADEON_CG_BLOCK_MC) {
6235 		if (!(rdev->flags & RADEON_IS_IGP)) {
6236 			cik_enable_mc_mgcg(rdev, enable);
6237 			cik_enable_mc_ls(rdev, enable);
6238 		}
6239 	}
6240 
6241 	if (block & RADEON_CG_BLOCK_SDMA) {
6242 		cik_enable_sdma_mgcg(rdev, enable);
6243 		cik_enable_sdma_mgls(rdev, enable);
6244 	}
6245 
6246 	if (block & RADEON_CG_BLOCK_BIF) {
6247 		cik_enable_bif_mgls(rdev, enable);
6248 	}
6249 
6250 	if (block & RADEON_CG_BLOCK_UVD) {
6251 		if (rdev->has_uvd)
6252 			cik_enable_uvd_mgcg(rdev, enable);
6253 	}
6254 
6255 	if (block & RADEON_CG_BLOCK_HDP) {
6256 		cik_enable_hdp_mgcg(rdev, enable);
6257 		cik_enable_hdp_ls(rdev, enable);
6258 	}
6259 
6260 	if (block & RADEON_CG_BLOCK_VCE) {
6261 		vce_v2_0_enable_mgcg(rdev, enable);
6262 	}
6263 }
6264 
6265 static void cik_init_cg(struct radeon_device *rdev)
6266 {
6267 
6268 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6269 
6270 	if (rdev->has_uvd)
6271 		si_init_uvd_internal_cg(rdev);
6272 
6273 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6274 			     RADEON_CG_BLOCK_SDMA |
6275 			     RADEON_CG_BLOCK_BIF |
6276 			     RADEON_CG_BLOCK_UVD |
6277 			     RADEON_CG_BLOCK_HDP), true);
6278 }
6279 
6280 static void cik_fini_cg(struct radeon_device *rdev)
6281 {
6282 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6283 			     RADEON_CG_BLOCK_SDMA |
6284 			     RADEON_CG_BLOCK_BIF |
6285 			     RADEON_CG_BLOCK_UVD |
6286 			     RADEON_CG_BLOCK_HDP), false);
6287 
6288 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6289 }
6290 
6291 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6292 					  bool enable)
6293 {
6294 	u32 data, orig;
6295 
6296 	orig = data = RREG32(RLC_PG_CNTL);
6297 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6298 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6299 	else
6300 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6301 	if (orig != data)
6302 		WREG32(RLC_PG_CNTL, data);
6303 }
6304 
6305 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6306 					  bool enable)
6307 {
6308 	u32 data, orig;
6309 
6310 	orig = data = RREG32(RLC_PG_CNTL);
6311 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6312 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6313 	else
6314 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6315 	if (orig != data)
6316 		WREG32(RLC_PG_CNTL, data);
6317 }
6318 
6319 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6320 {
6321 	u32 data, orig;
6322 
6323 	orig = data = RREG32(RLC_PG_CNTL);
6324 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6325 		data &= ~DISABLE_CP_PG;
6326 	else
6327 		data |= DISABLE_CP_PG;
6328 	if (orig != data)
6329 		WREG32(RLC_PG_CNTL, data);
6330 }
6331 
6332 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6333 {
6334 	u32 data, orig;
6335 
6336 	orig = data = RREG32(RLC_PG_CNTL);
6337 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6338 		data &= ~DISABLE_GDS_PG;
6339 	else
6340 		data |= DISABLE_GDS_PG;
6341 	if (orig != data)
6342 		WREG32(RLC_PG_CNTL, data);
6343 }
6344 
6345 #define CP_ME_TABLE_SIZE    96
6346 #define CP_ME_TABLE_OFFSET  2048
6347 #define CP_MEC_TABLE_OFFSET 4096
6348 
6349 void cik_init_cp_pg_table(struct radeon_device *rdev)
6350 {
6351 	const __be32 *fw_data;
6352 	volatile u32 *dst_ptr;
6353 	int me, i, max_me = 4;
6354 	u32 bo_offset = 0;
6355 	u32 table_offset;
6356 
6357 	if (rdev->family == CHIP_KAVERI)
6358 		max_me = 5;
6359 
6360 	if (rdev->rlc.cp_table_ptr == NULL)
6361 		return;
6362 
6363 	/* write the cp table buffer */
6364 	dst_ptr = rdev->rlc.cp_table_ptr;
6365 	for (me = 0; me < max_me; me++) {
6366 		if (me == 0) {
6367 			fw_data = (const __be32 *)rdev->ce_fw->data;
6368 			table_offset = CP_ME_TABLE_OFFSET;
6369 		} else if (me == 1) {
6370 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6371 			table_offset = CP_ME_TABLE_OFFSET;
6372 		} else if (me == 2) {
6373 			fw_data = (const __be32 *)rdev->me_fw->data;
6374 			table_offset = CP_ME_TABLE_OFFSET;
6375 		} else {
6376 			fw_data = (const __be32 *)rdev->mec_fw->data;
6377 			table_offset = CP_MEC_TABLE_OFFSET;
6378 		}
6379 
6380 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6381 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6382 		}
6383 		bo_offset += CP_ME_TABLE_SIZE;
6384 	}
6385 }
6386 
6387 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6388 				bool enable)
6389 {
6390 	u32 data, orig;
6391 
6392 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6393 		orig = data = RREG32(RLC_PG_CNTL);
6394 		data |= GFX_PG_ENABLE;
6395 		if (orig != data)
6396 			WREG32(RLC_PG_CNTL, data);
6397 
6398 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6399 		data |= AUTO_PG_EN;
6400 		if (orig != data)
6401 			WREG32(RLC_AUTO_PG_CTRL, data);
6402 	} else {
6403 		orig = data = RREG32(RLC_PG_CNTL);
6404 		data &= ~GFX_PG_ENABLE;
6405 		if (orig != data)
6406 			WREG32(RLC_PG_CNTL, data);
6407 
6408 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6409 		data &= ~AUTO_PG_EN;
6410 		if (orig != data)
6411 			WREG32(RLC_AUTO_PG_CTRL, data);
6412 
6413 		data = RREG32(DB_RENDER_CONTROL);
6414 	}
6415 }
6416 
6417 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6418 {
6419 	u32 mask = 0, tmp, tmp1;
6420 	int i;
6421 
6422 	cik_select_se_sh(rdev, se, sh);
6423 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6424 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6425 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6426 
6427 	tmp &= 0xffff0000;
6428 
6429 	tmp |= tmp1;
6430 	tmp >>= 16;
6431 
6432 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6433 		mask <<= 1;
6434 		mask |= 1;
6435 	}
6436 
6437 	return (~tmp) & mask;
6438 }
6439 
6440 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6441 {
6442 	u32 i, j, k, active_cu_number = 0;
6443 	u32 mask, counter, cu_bitmap;
6444 	u32 tmp = 0;
6445 
6446 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6447 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6448 			mask = 1;
6449 			cu_bitmap = 0;
6450 			counter = 0;
6451 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6452 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6453 					if (counter < 2)
6454 						cu_bitmap |= mask;
6455 					counter ++;
6456 				}
6457 				mask <<= 1;
6458 			}
6459 
6460 			active_cu_number += counter;
6461 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6462 		}
6463 	}
6464 
6465 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6466 
6467 	tmp = RREG32(RLC_MAX_PG_CU);
6468 	tmp &= ~MAX_PU_CU_MASK;
6469 	tmp |= MAX_PU_CU(active_cu_number);
6470 	WREG32(RLC_MAX_PG_CU, tmp);
6471 }
6472 
6473 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6474 				       bool enable)
6475 {
6476 	u32 data, orig;
6477 
6478 	orig = data = RREG32(RLC_PG_CNTL);
6479 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6480 		data |= STATIC_PER_CU_PG_ENABLE;
6481 	else
6482 		data &= ~STATIC_PER_CU_PG_ENABLE;
6483 	if (orig != data)
6484 		WREG32(RLC_PG_CNTL, data);
6485 }
6486 
6487 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6488 					bool enable)
6489 {
6490 	u32 data, orig;
6491 
6492 	orig = data = RREG32(RLC_PG_CNTL);
6493 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6494 		data |= DYN_PER_CU_PG_ENABLE;
6495 	else
6496 		data &= ~DYN_PER_CU_PG_ENABLE;
6497 	if (orig != data)
6498 		WREG32(RLC_PG_CNTL, data);
6499 }
6500 
6501 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6502 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6503 
6504 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6505 {
6506 	u32 data, orig;
6507 	u32 i;
6508 
6509 	if (rdev->rlc.cs_data) {
6510 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6511 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6512 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6513 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6514 	} else {
6515 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6516 		for (i = 0; i < 3; i++)
6517 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6518 	}
6519 	if (rdev->rlc.reg_list) {
6520 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6521 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6522 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6523 	}
6524 
6525 	orig = data = RREG32(RLC_PG_CNTL);
6526 	data |= GFX_PG_SRC;
6527 	if (orig != data)
6528 		WREG32(RLC_PG_CNTL, data);
6529 
6530 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6531 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6532 
6533 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6534 	data &= ~IDLE_POLL_COUNT_MASK;
6535 	data |= IDLE_POLL_COUNT(0x60);
6536 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6537 
6538 	data = 0x10101010;
6539 	WREG32(RLC_PG_DELAY, data);
6540 
6541 	data = RREG32(RLC_PG_DELAY_2);
6542 	data &= ~0xff;
6543 	data |= 0x3;
6544 	WREG32(RLC_PG_DELAY_2, data);
6545 
6546 	data = RREG32(RLC_AUTO_PG_CTRL);
6547 	data &= ~GRBM_REG_SGIT_MASK;
6548 	data |= GRBM_REG_SGIT(0x700);
6549 	WREG32(RLC_AUTO_PG_CTRL, data);
6550 
6551 }
6552 
6553 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6554 {
6555 	cik_enable_gfx_cgpg(rdev, enable);
6556 	cik_enable_gfx_static_mgpg(rdev, enable);
6557 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6558 }
6559 
6560 u32 cik_get_csb_size(struct radeon_device *rdev)
6561 {
6562 	u32 count = 0;
6563 	const struct cs_section_def *sect = NULL;
6564 	const struct cs_extent_def *ext = NULL;
6565 
6566 	if (rdev->rlc.cs_data == NULL)
6567 		return 0;
6568 
6569 	/* begin clear state */
6570 	count += 2;
6571 	/* context control state */
6572 	count += 3;
6573 
6574 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6575 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6576 			if (sect->id == SECT_CONTEXT)
6577 				count += 2 + ext->reg_count;
6578 			else
6579 				return 0;
6580 		}
6581 	}
6582 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6583 	count += 4;
6584 	/* end clear state */
6585 	count += 2;
6586 	/* clear state */
6587 	count += 2;
6588 
6589 	return count;
6590 }
6591 
6592 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6593 {
6594 	u32 count = 0, i;
6595 	const struct cs_section_def *sect = NULL;
6596 	const struct cs_extent_def *ext = NULL;
6597 
6598 	if (rdev->rlc.cs_data == NULL)
6599 		return;
6600 	if (buffer == NULL)
6601 		return;
6602 
6603 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6604 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6605 
6606 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6607 	buffer[count++] = cpu_to_le32(0x80000000);
6608 	buffer[count++] = cpu_to_le32(0x80000000);
6609 
6610 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6611 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6612 			if (sect->id == SECT_CONTEXT) {
6613 				buffer[count++] =
6614 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6615 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6616 				for (i = 0; i < ext->reg_count; i++)
6617 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6618 			} else {
6619 				return;
6620 			}
6621 		}
6622 	}
6623 
6624 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6625 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6626 	switch (rdev->family) {
6627 	case CHIP_BONAIRE:
6628 		buffer[count++] = cpu_to_le32(0x16000012);
6629 		buffer[count++] = cpu_to_le32(0x00000000);
6630 		break;
6631 	case CHIP_KAVERI:
6632 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6633 		buffer[count++] = cpu_to_le32(0x00000000);
6634 		break;
6635 	case CHIP_KABINI:
6636 	case CHIP_MULLINS:
6637 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6638 		buffer[count++] = cpu_to_le32(0x00000000);
6639 		break;
6640 	case CHIP_HAWAII:
6641 		buffer[count++] = cpu_to_le32(0x3a00161a);
6642 		buffer[count++] = cpu_to_le32(0x0000002e);
6643 		break;
6644 	default:
6645 		buffer[count++] = cpu_to_le32(0x00000000);
6646 		buffer[count++] = cpu_to_le32(0x00000000);
6647 		break;
6648 	}
6649 
6650 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6651 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6652 
6653 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6654 	buffer[count++] = cpu_to_le32(0);
6655 }
6656 
6657 static void cik_init_pg(struct radeon_device *rdev)
6658 {
6659 	if (rdev->pg_flags) {
6660 		cik_enable_sck_slowdown_on_pu(rdev, true);
6661 		cik_enable_sck_slowdown_on_pd(rdev, true);
6662 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6663 			cik_init_gfx_cgpg(rdev);
6664 			cik_enable_cp_pg(rdev, true);
6665 			cik_enable_gds_pg(rdev, true);
6666 		}
6667 		cik_init_ao_cu_mask(rdev);
6668 		cik_update_gfx_pg(rdev, true);
6669 	}
6670 }
6671 
6672 static void cik_fini_pg(struct radeon_device *rdev)
6673 {
6674 	if (rdev->pg_flags) {
6675 		cik_update_gfx_pg(rdev, false);
6676 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6677 			cik_enable_cp_pg(rdev, false);
6678 			cik_enable_gds_pg(rdev, false);
6679 		}
6680 	}
6681 }
6682 
6683 /*
6684  * Interrupts
6685  * Starting with r6xx, interrupts are handled via a ring buffer.
6686  * Ring buffers are areas of GPU accessible memory that the GPU
6687  * writes interrupt vectors into and the host reads vectors out of.
6688  * There is a rptr (read pointer) that determines where the
6689  * host is currently reading, and a wptr (write pointer)
6690  * which determines where the GPU has written.  When the
6691  * pointers are equal, the ring is idle.  When the GPU
6692  * writes vectors to the ring buffer, it increments the
6693  * wptr.  When there is an interrupt, the host then starts
6694  * fetching commands and processing them until the pointers are
6695  * equal again at which point it updates the rptr.
6696  */
6697 
6698 /**
6699  * cik_enable_interrupts - Enable the interrupt ring buffer
6700  *
6701  * @rdev: radeon_device pointer
6702  *
6703  * Enable the interrupt ring buffer (CIK).
6704  */
6705 static void cik_enable_interrupts(struct radeon_device *rdev)
6706 {
6707 	u32 ih_cntl = RREG32(IH_CNTL);
6708 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6709 
6710 	ih_cntl |= ENABLE_INTR;
6711 	ih_rb_cntl |= IH_RB_ENABLE;
6712 	WREG32(IH_CNTL, ih_cntl);
6713 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6714 	rdev->ih.enabled = true;
6715 }
6716 
6717 /**
6718  * cik_disable_interrupts - Disable the interrupt ring buffer
6719  *
6720  * @rdev: radeon_device pointer
6721  *
6722  * Disable the interrupt ring buffer (CIK).
6723  */
6724 static void cik_disable_interrupts(struct radeon_device *rdev)
6725 {
6726 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6727 	u32 ih_cntl = RREG32(IH_CNTL);
6728 
6729 	ih_rb_cntl &= ~IH_RB_ENABLE;
6730 	ih_cntl &= ~ENABLE_INTR;
6731 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6732 	WREG32(IH_CNTL, ih_cntl);
6733 	/* set rptr, wptr to 0 */
6734 	WREG32(IH_RB_RPTR, 0);
6735 	WREG32(IH_RB_WPTR, 0);
6736 	rdev->ih.enabled = false;
6737 	rdev->ih.rptr = 0;
6738 }
6739 
6740 /**
6741  * cik_disable_interrupt_state - Disable all interrupt sources
6742  *
6743  * @rdev: radeon_device pointer
6744  *
6745  * Clear all interrupt enable bits used by the driver (CIK).
6746  */
6747 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6748 {
6749 	u32 tmp;
6750 
6751 	/* gfx ring */
6752 	tmp = RREG32(CP_INT_CNTL_RING0) &
6753 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6754 	WREG32(CP_INT_CNTL_RING0, tmp);
6755 	/* sdma */
6756 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6757 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6758 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6759 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6760 	/* compute queues */
6761 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6762 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6763 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6764 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6765 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6766 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6767 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6768 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6769 	/* grbm */
6770 	WREG32(GRBM_INT_CNTL, 0);
6771 	/* vline/vblank, etc. */
6772 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6773 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6774 	if (rdev->num_crtc >= 4) {
6775 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6776 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6777 	}
6778 	if (rdev->num_crtc >= 6) {
6779 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6780 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6781 	}
6782 	/* pflip */
6783 	if (rdev->num_crtc >= 2) {
6784 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6785 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6786 	}
6787 	if (rdev->num_crtc >= 4) {
6788 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6789 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6790 	}
6791 	if (rdev->num_crtc >= 6) {
6792 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6793 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6794 	}
6795 
6796 	/* dac hotplug */
6797 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6798 
6799 	/* digital hotplug */
6800 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6801 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6802 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6803 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6804 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6805 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6806 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6807 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6808 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6809 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6810 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6811 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6812 
6813 }
6814 
6815 /**
6816  * cik_irq_init - init and enable the interrupt ring
6817  *
6818  * @rdev: radeon_device pointer
6819  *
6820  * Allocate a ring buffer for the interrupt controller,
6821  * enable the RLC, disable interrupts, enable the IH
6822  * ring buffer and enable it (CIK).
6823  * Called at device load and reume.
6824  * Returns 0 for success, errors for failure.
6825  */
6826 static int cik_irq_init(struct radeon_device *rdev)
6827 {
6828 	int ret = 0;
6829 	int rb_bufsz;
6830 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6831 
6832 	/* allocate ring */
6833 	ret = r600_ih_ring_alloc(rdev);
6834 	if (ret)
6835 		return ret;
6836 
6837 	/* disable irqs */
6838 	cik_disable_interrupts(rdev);
6839 
6840 	/* init rlc */
6841 	ret = cik_rlc_resume(rdev);
6842 	if (ret) {
6843 		r600_ih_ring_fini(rdev);
6844 		return ret;
6845 	}
6846 
6847 	/* setup interrupt control */
6848 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6849 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6850 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6851 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6852 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6853 	 */
6854 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6855 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6856 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6857 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6858 
6859 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6860 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6861 
6862 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6863 		      IH_WPTR_OVERFLOW_CLEAR |
6864 		      (rb_bufsz << 1));
6865 
6866 	if (rdev->wb.enabled)
6867 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6868 
6869 	/* set the writeback address whether it's enabled or not */
6870 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6871 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6872 
6873 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6874 
6875 	/* set rptr, wptr to 0 */
6876 	WREG32(IH_RB_RPTR, 0);
6877 	WREG32(IH_RB_WPTR, 0);
6878 
6879 	/* Default settings for IH_CNTL (disabled at first) */
6880 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6881 	/* RPTR_REARM only works if msi's are enabled */
6882 	if (rdev->msi_enabled)
6883 		ih_cntl |= RPTR_REARM;
6884 	WREG32(IH_CNTL, ih_cntl);
6885 
6886 	/* force the active interrupt state to all disabled */
6887 	cik_disable_interrupt_state(rdev);
6888 
6889 	pci_set_master(rdev->pdev);
6890 
6891 	/* enable irqs */
6892 	cik_enable_interrupts(rdev);
6893 
6894 	return ret;
6895 }
6896 
6897 /**
6898  * cik_irq_set - enable/disable interrupt sources
6899  *
6900  * @rdev: radeon_device pointer
6901  *
6902  * Enable interrupt sources on the GPU (vblanks, hpd,
6903  * etc.) (CIK).
6904  * Returns 0 for success, errors for failure.
6905  */
6906 int cik_irq_set(struct radeon_device *rdev)
6907 {
6908 	u32 cp_int_cntl;
6909 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6910 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6911 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6912 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6913 	u32 grbm_int_cntl = 0;
6914 	u32 dma_cntl, dma_cntl1;
6915 	u32 thermal_int;
6916 
6917 	if (!rdev->irq.installed) {
6918 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6919 		return -EINVAL;
6920 	}
6921 	/* don't enable anything if the ih is disabled */
6922 	if (!rdev->ih.enabled) {
6923 		cik_disable_interrupts(rdev);
6924 		/* force the active interrupt state to all disabled */
6925 		cik_disable_interrupt_state(rdev);
6926 		return 0;
6927 	}
6928 
6929 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6930 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6931 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6932 
6933 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6934 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6935 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6936 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6937 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6938 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6939 
6940 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6941 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6942 
6943 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6944 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6945 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6946 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6947 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6948 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6949 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6950 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6951 
6952 	if (rdev->flags & RADEON_IS_IGP)
6953 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6954 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6955 	else
6956 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6957 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6958 
6959 	/* enable CP interrupts on all rings */
6960 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6961 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6962 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6963 	}
6964 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6965 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6966 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6967 		if (ring->me == 1) {
6968 			switch (ring->pipe) {
6969 			case 0:
6970 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6971 				break;
6972 			case 1:
6973 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6974 				break;
6975 			case 2:
6976 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6977 				break;
6978 			case 3:
6979 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6980 				break;
6981 			default:
6982 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6983 				break;
6984 			}
6985 		} else if (ring->me == 2) {
6986 			switch (ring->pipe) {
6987 			case 0:
6988 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6989 				break;
6990 			case 1:
6991 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6992 				break;
6993 			case 2:
6994 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6995 				break;
6996 			case 3:
6997 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6998 				break;
6999 			default:
7000 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7001 				break;
7002 			}
7003 		} else {
7004 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7005 		}
7006 	}
7007 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7008 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7009 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7010 		if (ring->me == 1) {
7011 			switch (ring->pipe) {
7012 			case 0:
7013 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7014 				break;
7015 			case 1:
7016 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7017 				break;
7018 			case 2:
7019 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7020 				break;
7021 			case 3:
7022 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7023 				break;
7024 			default:
7025 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7026 				break;
7027 			}
7028 		} else if (ring->me == 2) {
7029 			switch (ring->pipe) {
7030 			case 0:
7031 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7032 				break;
7033 			case 1:
7034 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7035 				break;
7036 			case 2:
7037 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7038 				break;
7039 			case 3:
7040 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7041 				break;
7042 			default:
7043 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7044 				break;
7045 			}
7046 		} else {
7047 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7048 		}
7049 	}
7050 
7051 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7052 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7053 		dma_cntl |= TRAP_ENABLE;
7054 	}
7055 
7056 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7057 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7058 		dma_cntl1 |= TRAP_ENABLE;
7059 	}
7060 
7061 	if (rdev->irq.crtc_vblank_int[0] ||
7062 	    atomic_read(&rdev->irq.pflip[0])) {
7063 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7064 		crtc1 |= VBLANK_INTERRUPT_MASK;
7065 	}
7066 	if (rdev->irq.crtc_vblank_int[1] ||
7067 	    atomic_read(&rdev->irq.pflip[1])) {
7068 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7069 		crtc2 |= VBLANK_INTERRUPT_MASK;
7070 	}
7071 	if (rdev->irq.crtc_vblank_int[2] ||
7072 	    atomic_read(&rdev->irq.pflip[2])) {
7073 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7074 		crtc3 |= VBLANK_INTERRUPT_MASK;
7075 	}
7076 	if (rdev->irq.crtc_vblank_int[3] ||
7077 	    atomic_read(&rdev->irq.pflip[3])) {
7078 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7079 		crtc4 |= VBLANK_INTERRUPT_MASK;
7080 	}
7081 	if (rdev->irq.crtc_vblank_int[4] ||
7082 	    atomic_read(&rdev->irq.pflip[4])) {
7083 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7084 		crtc5 |= VBLANK_INTERRUPT_MASK;
7085 	}
7086 	if (rdev->irq.crtc_vblank_int[5] ||
7087 	    atomic_read(&rdev->irq.pflip[5])) {
7088 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7089 		crtc6 |= VBLANK_INTERRUPT_MASK;
7090 	}
7091 	if (rdev->irq.hpd[0]) {
7092 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7093 		hpd1 |= DC_HPDx_INT_EN;
7094 	}
7095 	if (rdev->irq.hpd[1]) {
7096 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7097 		hpd2 |= DC_HPDx_INT_EN;
7098 	}
7099 	if (rdev->irq.hpd[2]) {
7100 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7101 		hpd3 |= DC_HPDx_INT_EN;
7102 	}
7103 	if (rdev->irq.hpd[3]) {
7104 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7105 		hpd4 |= DC_HPDx_INT_EN;
7106 	}
7107 	if (rdev->irq.hpd[4]) {
7108 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7109 		hpd5 |= DC_HPDx_INT_EN;
7110 	}
7111 	if (rdev->irq.hpd[5]) {
7112 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7113 		hpd6 |= DC_HPDx_INT_EN;
7114 	}
7115 
7116 	if (rdev->irq.dpm_thermal) {
7117 		DRM_DEBUG("dpm thermal\n");
7118 		if (rdev->flags & RADEON_IS_IGP)
7119 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7120 		else
7121 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7122 	}
7123 
7124 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7125 
7126 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7127 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7128 
7129 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7130 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7131 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7132 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7133 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7134 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7135 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7136 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7137 
7138 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7139 
7140 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7141 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7142 	if (rdev->num_crtc >= 4) {
7143 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7144 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7145 	}
7146 	if (rdev->num_crtc >= 6) {
7147 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7148 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7149 	}
7150 
7151 	if (rdev->num_crtc >= 2) {
7152 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7153 		       GRPH_PFLIP_INT_MASK);
7154 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7155 		       GRPH_PFLIP_INT_MASK);
7156 	}
7157 	if (rdev->num_crtc >= 4) {
7158 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7159 		       GRPH_PFLIP_INT_MASK);
7160 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7161 		       GRPH_PFLIP_INT_MASK);
7162 	}
7163 	if (rdev->num_crtc >= 6) {
7164 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7165 		       GRPH_PFLIP_INT_MASK);
7166 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7167 		       GRPH_PFLIP_INT_MASK);
7168 	}
7169 
7170 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7171 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7172 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7173 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7174 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7175 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7176 
7177 	if (rdev->flags & RADEON_IS_IGP)
7178 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7179 	else
7180 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7181 
7182 	return 0;
7183 }
7184 
7185 /**
7186  * cik_irq_ack - ack interrupt sources
7187  *
7188  * @rdev: radeon_device pointer
7189  *
7190  * Ack interrupt sources on the GPU (vblanks, hpd,
7191  * etc.) (CIK).  Certain interrupts sources are sw
7192  * generated and do not require an explicit ack.
7193  */
7194 static inline void cik_irq_ack(struct radeon_device *rdev)
7195 {
7196 	u32 tmp;
7197 
7198 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7199 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7200 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7201 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7202 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7203 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7204 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7205 
7206 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7207 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7208 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7209 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7210 	if (rdev->num_crtc >= 4) {
7211 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7212 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7213 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7214 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7215 	}
7216 	if (rdev->num_crtc >= 6) {
7217 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7218 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7219 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7220 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7221 	}
7222 
7223 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7224 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7225 		       GRPH_PFLIP_INT_CLEAR);
7226 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7227 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7228 		       GRPH_PFLIP_INT_CLEAR);
7229 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7230 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7231 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7232 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7233 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7234 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7235 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7236 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7237 
7238 	if (rdev->num_crtc >= 4) {
7239 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7240 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7241 			       GRPH_PFLIP_INT_CLEAR);
7242 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7243 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7244 			       GRPH_PFLIP_INT_CLEAR);
7245 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7246 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7247 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7248 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7249 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7250 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7251 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7252 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7253 	}
7254 
7255 	if (rdev->num_crtc >= 6) {
7256 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7257 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258 			       GRPH_PFLIP_INT_CLEAR);
7259 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7260 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7261 			       GRPH_PFLIP_INT_CLEAR);
7262 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7263 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7264 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7265 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7266 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7267 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7268 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7269 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7270 	}
7271 
7272 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7273 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7274 		tmp |= DC_HPDx_INT_ACK;
7275 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7276 	}
7277 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7278 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7279 		tmp |= DC_HPDx_INT_ACK;
7280 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7281 	}
7282 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7283 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7284 		tmp |= DC_HPDx_INT_ACK;
7285 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7286 	}
7287 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7288 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7289 		tmp |= DC_HPDx_INT_ACK;
7290 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7291 	}
7292 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7293 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7294 		tmp |= DC_HPDx_INT_ACK;
7295 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7296 	}
7297 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7298 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7299 		tmp |= DC_HPDx_INT_ACK;
7300 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7301 	}
7302 }
7303 
7304 /**
7305  * cik_irq_disable - disable interrupts
7306  *
7307  * @rdev: radeon_device pointer
7308  *
7309  * Disable interrupts on the hw (CIK).
7310  */
7311 static void cik_irq_disable(struct radeon_device *rdev)
7312 {
7313 	cik_disable_interrupts(rdev);
7314 	/* Wait and acknowledge irq */
7315 	mdelay(1);
7316 	cik_irq_ack(rdev);
7317 	cik_disable_interrupt_state(rdev);
7318 }
7319 
7320 /**
7321  * cik_irq_disable - disable interrupts for suspend
7322  *
7323  * @rdev: radeon_device pointer
7324  *
7325  * Disable interrupts and stop the RLC (CIK).
7326  * Used for suspend.
7327  */
7328 static void cik_irq_suspend(struct radeon_device *rdev)
7329 {
7330 	cik_irq_disable(rdev);
7331 	cik_rlc_stop(rdev);
7332 }
7333 
7334 /**
7335  * cik_irq_fini - tear down interrupt support
7336  *
7337  * @rdev: radeon_device pointer
7338  *
7339  * Disable interrupts on the hw and free the IH ring
7340  * buffer (CIK).
7341  * Used for driver unload.
7342  */
7343 static void cik_irq_fini(struct radeon_device *rdev)
7344 {
7345 	cik_irq_suspend(rdev);
7346 	r600_ih_ring_fini(rdev);
7347 }
7348 
7349 /**
7350  * cik_get_ih_wptr - get the IH ring buffer wptr
7351  *
7352  * @rdev: radeon_device pointer
7353  *
7354  * Get the IH ring buffer wptr from either the register
7355  * or the writeback memory buffer (CIK).  Also check for
7356  * ring buffer overflow and deal with it.
7357  * Used by cik_irq_process().
7358  * Returns the value of the wptr.
7359  */
7360 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7361 {
7362 	u32 wptr, tmp;
7363 
7364 	if (rdev->wb.enabled)
7365 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7366 	else
7367 		wptr = RREG32(IH_RB_WPTR);
7368 
7369 	if (wptr & RB_OVERFLOW) {
7370 		/* When a ring buffer overflow happen start parsing interrupt
7371 		 * from the last not overwritten vector (wptr + 16). Hopefully
7372 		 * this should allow us to catchup.
7373 		 */
7374 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7375 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7376 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7377 		tmp = RREG32(IH_RB_CNTL);
7378 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7379 		WREG32(IH_RB_CNTL, tmp);
7380 		wptr &= ~RB_OVERFLOW;
7381 	}
7382 	return (wptr & rdev->ih.ptr_mask);
7383 }
7384 
7385 /*        CIK IV Ring
7386  * Each IV ring entry is 128 bits:
7387  * [7:0]    - interrupt source id
7388  * [31:8]   - reserved
7389  * [59:32]  - interrupt source data
7390  * [63:60]  - reserved
7391  * [71:64]  - RINGID
7392  *            CP:
7393  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7394  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7395  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7396  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7397  *            PIPE_ID - ME0 0=3D
7398  *                    - ME1&2 compute dispatcher (4 pipes each)
7399  *            SDMA:
7400  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7401  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7402  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7403  * [79:72]  - VMID
7404  * [95:80]  - PASID
7405  * [127:96] - reserved
7406  */
7407 /**
7408  * cik_irq_process - interrupt handler
7409  *
7410  * @rdev: radeon_device pointer
7411  *
7412  * Interrupt hander (CIK).  Walk the IH ring,
7413  * ack interrupts and schedule work to handle
7414  * interrupt events.
7415  * Returns irq process return code.
7416  */
7417 int cik_irq_process(struct radeon_device *rdev)
7418 {
7419 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7420 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7421 	u32 wptr;
7422 	u32 rptr;
7423 	u32 src_id, src_data, ring_id;
7424 	u8 me_id, pipe_id, queue_id;
7425 	u32 ring_index;
7426 	bool queue_hotplug = false;
7427 	bool queue_reset = false;
7428 	u32 addr, status, mc_client;
7429 	bool queue_thermal = false;
7430 
7431 	if (!rdev->ih.enabled || rdev->shutdown)
7432 		return IRQ_NONE;
7433 
7434 	wptr = cik_get_ih_wptr(rdev);
7435 
7436 restart_ih:
7437 	/* is somebody else already processing irqs? */
7438 	if (atomic_xchg(&rdev->ih.lock, 1))
7439 		return IRQ_NONE;
7440 
7441 	rptr = rdev->ih.rptr;
7442 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7443 
7444 	/* Order reading of wptr vs. reading of IH ring data */
7445 	rmb();
7446 
7447 	/* display interrupts */
7448 	cik_irq_ack(rdev);
7449 
7450 	while (rptr != wptr) {
7451 		/* wptr/rptr are in bytes! */
7452 		ring_index = rptr / 4;
7453 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7454 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7455 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7456 
7457 		switch (src_id) {
7458 		case 1: /* D1 vblank/vline */
7459 			switch (src_data) {
7460 			case 0: /* D1 vblank */
7461 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7462 					if (rdev->irq.crtc_vblank_int[0]) {
7463 						drm_handle_vblank(rdev->ddev, 0);
7464 						rdev->pm.vblank_sync = true;
7465 						wake_up(&rdev->irq.vblank_queue);
7466 					}
7467 					if (atomic_read(&rdev->irq.pflip[0]))
7468 						radeon_crtc_handle_vblank(rdev, 0);
7469 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7470 					DRM_DEBUG("IH: D1 vblank\n");
7471 				}
7472 				break;
7473 			case 1: /* D1 vline */
7474 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7475 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7476 					DRM_DEBUG("IH: D1 vline\n");
7477 				}
7478 				break;
7479 			default:
7480 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7481 				break;
7482 			}
7483 			break;
7484 		case 2: /* D2 vblank/vline */
7485 			switch (src_data) {
7486 			case 0: /* D2 vblank */
7487 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7488 					if (rdev->irq.crtc_vblank_int[1]) {
7489 						drm_handle_vblank(rdev->ddev, 1);
7490 						rdev->pm.vblank_sync = true;
7491 						wake_up(&rdev->irq.vblank_queue);
7492 					}
7493 					if (atomic_read(&rdev->irq.pflip[1]))
7494 						radeon_crtc_handle_vblank(rdev, 1);
7495 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7496 					DRM_DEBUG("IH: D2 vblank\n");
7497 				}
7498 				break;
7499 			case 1: /* D2 vline */
7500 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7501 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7502 					DRM_DEBUG("IH: D2 vline\n");
7503 				}
7504 				break;
7505 			default:
7506 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7507 				break;
7508 			}
7509 			break;
7510 		case 3: /* D3 vblank/vline */
7511 			switch (src_data) {
7512 			case 0: /* D3 vblank */
7513 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7514 					if (rdev->irq.crtc_vblank_int[2]) {
7515 						drm_handle_vblank(rdev->ddev, 2);
7516 						rdev->pm.vblank_sync = true;
7517 						wake_up(&rdev->irq.vblank_queue);
7518 					}
7519 					if (atomic_read(&rdev->irq.pflip[2]))
7520 						radeon_crtc_handle_vblank(rdev, 2);
7521 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7522 					DRM_DEBUG("IH: D3 vblank\n");
7523 				}
7524 				break;
7525 			case 1: /* D3 vline */
7526 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7527 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7528 					DRM_DEBUG("IH: D3 vline\n");
7529 				}
7530 				break;
7531 			default:
7532 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7533 				break;
7534 			}
7535 			break;
7536 		case 4: /* D4 vblank/vline */
7537 			switch (src_data) {
7538 			case 0: /* D4 vblank */
7539 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7540 					if (rdev->irq.crtc_vblank_int[3]) {
7541 						drm_handle_vblank(rdev->ddev, 3);
7542 						rdev->pm.vblank_sync = true;
7543 						wake_up(&rdev->irq.vblank_queue);
7544 					}
7545 					if (atomic_read(&rdev->irq.pflip[3]))
7546 						radeon_crtc_handle_vblank(rdev, 3);
7547 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7548 					DRM_DEBUG("IH: D4 vblank\n");
7549 				}
7550 				break;
7551 			case 1: /* D4 vline */
7552 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7553 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7554 					DRM_DEBUG("IH: D4 vline\n");
7555 				}
7556 				break;
7557 			default:
7558 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7559 				break;
7560 			}
7561 			break;
7562 		case 5: /* D5 vblank/vline */
7563 			switch (src_data) {
7564 			case 0: /* D5 vblank */
7565 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7566 					if (rdev->irq.crtc_vblank_int[4]) {
7567 						drm_handle_vblank(rdev->ddev, 4);
7568 						rdev->pm.vblank_sync = true;
7569 						wake_up(&rdev->irq.vblank_queue);
7570 					}
7571 					if (atomic_read(&rdev->irq.pflip[4]))
7572 						radeon_crtc_handle_vblank(rdev, 4);
7573 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7574 					DRM_DEBUG("IH: D5 vblank\n");
7575 				}
7576 				break;
7577 			case 1: /* D5 vline */
7578 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7579 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7580 					DRM_DEBUG("IH: D5 vline\n");
7581 				}
7582 				break;
7583 			default:
7584 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7585 				break;
7586 			}
7587 			break;
7588 		case 6: /* D6 vblank/vline */
7589 			switch (src_data) {
7590 			case 0: /* D6 vblank */
7591 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7592 					if (rdev->irq.crtc_vblank_int[5]) {
7593 						drm_handle_vblank(rdev->ddev, 5);
7594 						rdev->pm.vblank_sync = true;
7595 						wake_up(&rdev->irq.vblank_queue);
7596 					}
7597 					if (atomic_read(&rdev->irq.pflip[5]))
7598 						radeon_crtc_handle_vblank(rdev, 5);
7599 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7600 					DRM_DEBUG("IH: D6 vblank\n");
7601 				}
7602 				break;
7603 			case 1: /* D6 vline */
7604 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7605 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7606 					DRM_DEBUG("IH: D6 vline\n");
7607 				}
7608 				break;
7609 			default:
7610 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7611 				break;
7612 			}
7613 			break;
7614 		case 8: /* D1 page flip */
7615 		case 10: /* D2 page flip */
7616 		case 12: /* D3 page flip */
7617 		case 14: /* D4 page flip */
7618 		case 16: /* D5 page flip */
7619 		case 18: /* D6 page flip */
7620 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7621 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7622 			break;
7623 		case 42: /* HPD hotplug */
7624 			switch (src_data) {
7625 			case 0:
7626 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7627 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7628 					queue_hotplug = true;
7629 					DRM_DEBUG("IH: HPD1\n");
7630 				}
7631 				break;
7632 			case 1:
7633 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7634 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7635 					queue_hotplug = true;
7636 					DRM_DEBUG("IH: HPD2\n");
7637 				}
7638 				break;
7639 			case 2:
7640 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7641 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7642 					queue_hotplug = true;
7643 					DRM_DEBUG("IH: HPD3\n");
7644 				}
7645 				break;
7646 			case 3:
7647 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7648 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7649 					queue_hotplug = true;
7650 					DRM_DEBUG("IH: HPD4\n");
7651 				}
7652 				break;
7653 			case 4:
7654 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7655 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7656 					queue_hotplug = true;
7657 					DRM_DEBUG("IH: HPD5\n");
7658 				}
7659 				break;
7660 			case 5:
7661 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7662 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7663 					queue_hotplug = true;
7664 					DRM_DEBUG("IH: HPD6\n");
7665 				}
7666 				break;
7667 			default:
7668 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7669 				break;
7670 			}
7671 			break;
7672 		case 124: /* UVD */
7673 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7674 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7675 			break;
7676 		case 146:
7677 		case 147:
7678 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7679 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7680 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7681 			/* reset addr and status */
7682 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7683 			if (addr == 0x0 && status == 0x0)
7684 				break;
7685 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7686 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7687 				addr);
7688 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7689 				status);
7690 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7691 			break;
7692 		case 167: /* VCE */
7693 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7694 			switch (src_data) {
7695 			case 0:
7696 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7697 				break;
7698 			case 1:
7699 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7700 				break;
7701 			default:
7702 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7703 				break;
7704 			}
7705 			break;
7706 		case 176: /* GFX RB CP_INT */
7707 		case 177: /* GFX IB CP_INT */
7708 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7709 			break;
7710 		case 181: /* CP EOP event */
7711 			DRM_DEBUG("IH: CP EOP\n");
7712 			/* XXX check the bitfield order! */
7713 			me_id = (ring_id & 0x60) >> 5;
7714 			pipe_id = (ring_id & 0x18) >> 3;
7715 			queue_id = (ring_id & 0x7) >> 0;
7716 			switch (me_id) {
7717 			case 0:
7718 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7719 				break;
7720 			case 1:
7721 			case 2:
7722 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7723 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7724 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7725 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7726 				break;
7727 			}
7728 			break;
7729 		case 184: /* CP Privileged reg access */
7730 			DRM_ERROR("Illegal register access in command stream\n");
7731 			/* XXX check the bitfield order! */
7732 			me_id = (ring_id & 0x60) >> 5;
7733 			pipe_id = (ring_id & 0x18) >> 3;
7734 			queue_id = (ring_id & 0x7) >> 0;
7735 			switch (me_id) {
7736 			case 0:
7737 				/* This results in a full GPU reset, but all we need to do is soft
7738 				 * reset the CP for gfx
7739 				 */
7740 				queue_reset = true;
7741 				break;
7742 			case 1:
7743 				/* XXX compute */
7744 				queue_reset = true;
7745 				break;
7746 			case 2:
7747 				/* XXX compute */
7748 				queue_reset = true;
7749 				break;
7750 			}
7751 			break;
7752 		case 185: /* CP Privileged inst */
7753 			DRM_ERROR("Illegal instruction in command stream\n");
7754 			/* XXX check the bitfield order! */
7755 			me_id = (ring_id & 0x60) >> 5;
7756 			pipe_id = (ring_id & 0x18) >> 3;
7757 			queue_id = (ring_id & 0x7) >> 0;
7758 			switch (me_id) {
7759 			case 0:
7760 				/* This results in a full GPU reset, but all we need to do is soft
7761 				 * reset the CP for gfx
7762 				 */
7763 				queue_reset = true;
7764 				break;
7765 			case 1:
7766 				/* XXX compute */
7767 				queue_reset = true;
7768 				break;
7769 			case 2:
7770 				/* XXX compute */
7771 				queue_reset = true;
7772 				break;
7773 			}
7774 			break;
7775 		case 224: /* SDMA trap event */
7776 			/* XXX check the bitfield order! */
7777 			me_id = (ring_id & 0x3) >> 0;
7778 			queue_id = (ring_id & 0xc) >> 2;
7779 			DRM_DEBUG("IH: SDMA trap\n");
7780 			switch (me_id) {
7781 			case 0:
7782 				switch (queue_id) {
7783 				case 0:
7784 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7785 					break;
7786 				case 1:
7787 					/* XXX compute */
7788 					break;
7789 				case 2:
7790 					/* XXX compute */
7791 					break;
7792 				}
7793 				break;
7794 			case 1:
7795 				switch (queue_id) {
7796 				case 0:
7797 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7798 					break;
7799 				case 1:
7800 					/* XXX compute */
7801 					break;
7802 				case 2:
7803 					/* XXX compute */
7804 					break;
7805 				}
7806 				break;
7807 			}
7808 			break;
7809 		case 230: /* thermal low to high */
7810 			DRM_DEBUG("IH: thermal low to high\n");
7811 			rdev->pm.dpm.thermal.high_to_low = false;
7812 			queue_thermal = true;
7813 			break;
7814 		case 231: /* thermal high to low */
7815 			DRM_DEBUG("IH: thermal high to low\n");
7816 			rdev->pm.dpm.thermal.high_to_low = true;
7817 			queue_thermal = true;
7818 			break;
7819 		case 233: /* GUI IDLE */
7820 			DRM_DEBUG("IH: GUI idle\n");
7821 			break;
7822 		case 241: /* SDMA Privileged inst */
7823 		case 247: /* SDMA Privileged inst */
7824 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7825 			/* XXX check the bitfield order! */
7826 			me_id = (ring_id & 0x3) >> 0;
7827 			queue_id = (ring_id & 0xc) >> 2;
7828 			switch (me_id) {
7829 			case 0:
7830 				switch (queue_id) {
7831 				case 0:
7832 					queue_reset = true;
7833 					break;
7834 				case 1:
7835 					/* XXX compute */
7836 					queue_reset = true;
7837 					break;
7838 				case 2:
7839 					/* XXX compute */
7840 					queue_reset = true;
7841 					break;
7842 				}
7843 				break;
7844 			case 1:
7845 				switch (queue_id) {
7846 				case 0:
7847 					queue_reset = true;
7848 					break;
7849 				case 1:
7850 					/* XXX compute */
7851 					queue_reset = true;
7852 					break;
7853 				case 2:
7854 					/* XXX compute */
7855 					queue_reset = true;
7856 					break;
7857 				}
7858 				break;
7859 			}
7860 			break;
7861 		default:
7862 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7863 			break;
7864 		}
7865 
7866 		/* wptr/rptr are in bytes! */
7867 		rptr += 16;
7868 		rptr &= rdev->ih.ptr_mask;
7869 	}
7870 	if (queue_hotplug)
7871 		schedule_work(&rdev->hotplug_work);
7872 	if (queue_reset)
7873 		schedule_work(&rdev->reset_work);
7874 	if (queue_thermal)
7875 		schedule_work(&rdev->pm.dpm.thermal.work);
7876 	rdev->ih.rptr = rptr;
7877 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7878 	atomic_set(&rdev->ih.lock, 0);
7879 
7880 	/* make sure wptr hasn't changed while processing */
7881 	wptr = cik_get_ih_wptr(rdev);
7882 	if (wptr != rptr)
7883 		goto restart_ih;
7884 
7885 	return IRQ_HANDLED;
7886 }
7887 
7888 /*
7889  * startup/shutdown callbacks
7890  */
7891 /**
7892  * cik_startup - program the asic to a functional state
7893  *
7894  * @rdev: radeon_device pointer
7895  *
7896  * Programs the asic to a functional state (CIK).
7897  * Called by cik_init() and cik_resume().
7898  * Returns 0 for success, error for failure.
7899  */
7900 static int cik_startup(struct radeon_device *rdev)
7901 {
7902 	struct radeon_ring *ring;
7903 	int r;
7904 
7905 	/* enable pcie gen2/3 link */
7906 	cik_pcie_gen3_enable(rdev);
7907 	/* enable aspm */
7908 	cik_program_aspm(rdev);
7909 
7910 	/* scratch needs to be initialized before MC */
7911 	r = r600_vram_scratch_init(rdev);
7912 	if (r)
7913 		return r;
7914 
7915 	cik_mc_program(rdev);
7916 
7917 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7918 		r = ci_mc_load_microcode(rdev);
7919 		if (r) {
7920 			DRM_ERROR("Failed to load MC firmware!\n");
7921 			return r;
7922 		}
7923 	}
7924 
7925 	r = cik_pcie_gart_enable(rdev);
7926 	if (r)
7927 		return r;
7928 	cik_gpu_init(rdev);
7929 
7930 	/* allocate rlc buffers */
7931 	if (rdev->flags & RADEON_IS_IGP) {
7932 		if (rdev->family == CHIP_KAVERI) {
7933 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7934 			rdev->rlc.reg_list_size =
7935 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7936 		} else {
7937 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7938 			rdev->rlc.reg_list_size =
7939 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7940 		}
7941 	}
7942 	rdev->rlc.cs_data = ci_cs_data;
7943 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7944 	r = sumo_rlc_init(rdev);
7945 	if (r) {
7946 		DRM_ERROR("Failed to init rlc BOs!\n");
7947 		return r;
7948 	}
7949 
7950 	/* allocate wb buffer */
7951 	r = radeon_wb_init(rdev);
7952 	if (r)
7953 		return r;
7954 
7955 	/* allocate mec buffers */
7956 	r = cik_mec_init(rdev);
7957 	if (r) {
7958 		DRM_ERROR("Failed to init MEC BOs!\n");
7959 		return r;
7960 	}
7961 
7962 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7963 	if (r) {
7964 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7965 		return r;
7966 	}
7967 
7968 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7969 	if (r) {
7970 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7971 		return r;
7972 	}
7973 
7974 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7975 	if (r) {
7976 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7977 		return r;
7978 	}
7979 
7980 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7981 	if (r) {
7982 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7983 		return r;
7984 	}
7985 
7986 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7987 	if (r) {
7988 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7989 		return r;
7990 	}
7991 
7992 	r = radeon_uvd_resume(rdev);
7993 	if (!r) {
7994 		r = uvd_v4_2_resume(rdev);
7995 		if (!r) {
7996 			r = radeon_fence_driver_start_ring(rdev,
7997 							   R600_RING_TYPE_UVD_INDEX);
7998 			if (r)
7999 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8000 		}
8001 	}
8002 	if (r)
8003 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8004 
8005 	r = radeon_vce_resume(rdev);
8006 	if (!r) {
8007 		r = vce_v2_0_resume(rdev);
8008 		if (!r)
8009 			r = radeon_fence_driver_start_ring(rdev,
8010 							   TN_RING_TYPE_VCE1_INDEX);
8011 		if (!r)
8012 			r = radeon_fence_driver_start_ring(rdev,
8013 							   TN_RING_TYPE_VCE2_INDEX);
8014 	}
8015 	if (r) {
8016 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8017 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8018 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8019 	}
8020 
8021 	/* Enable IRQ */
8022 	if (!rdev->irq.installed) {
8023 		r = radeon_irq_kms_init(rdev);
8024 		if (r)
8025 			return r;
8026 	}
8027 
8028 	r = cik_irq_init(rdev);
8029 	if (r) {
8030 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8031 		radeon_irq_kms_fini(rdev);
8032 		return r;
8033 	}
8034 	cik_irq_set(rdev);
8035 
8036 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8037 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8038 			     PACKET3(PACKET3_NOP, 0x3FFF));
8039 	if (r)
8040 		return r;
8041 
8042 	/* set up the compute queues */
8043 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8044 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8045 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8046 			     PACKET3(PACKET3_NOP, 0x3FFF));
8047 	if (r)
8048 		return r;
8049 	ring->me = 1; /* first MEC */
8050 	ring->pipe = 0; /* first pipe */
8051 	ring->queue = 0; /* first queue */
8052 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8053 
8054 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8055 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8056 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8057 			     PACKET3(PACKET3_NOP, 0x3FFF));
8058 	if (r)
8059 		return r;
8060 	/* dGPU only have 1 MEC */
8061 	ring->me = 1; /* first MEC */
8062 	ring->pipe = 0; /* first pipe */
8063 	ring->queue = 1; /* second queue */
8064 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8065 
8066 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8067 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8068 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8069 	if (r)
8070 		return r;
8071 
8072 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8073 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8074 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8075 	if (r)
8076 		return r;
8077 
8078 	r = cik_cp_resume(rdev);
8079 	if (r)
8080 		return r;
8081 
8082 	r = cik_sdma_resume(rdev);
8083 	if (r)
8084 		return r;
8085 
8086 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8087 	if (ring->ring_size) {
8088 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8089 				     RADEON_CP_PACKET2);
8090 		if (!r)
8091 			r = uvd_v1_0_init(rdev);
8092 		if (r)
8093 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8094 	}
8095 
8096 	r = -ENOENT;
8097 
8098 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8099 	if (ring->ring_size)
8100 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8101 				     VCE_CMD_NO_OP);
8102 
8103 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8104 	if (ring->ring_size)
8105 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8106 				     VCE_CMD_NO_OP);
8107 
8108 	if (!r)
8109 		r = vce_v1_0_init(rdev);
8110 	else if (r != -ENOENT)
8111 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8112 
8113 	r = radeon_ib_pool_init(rdev);
8114 	if (r) {
8115 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8116 		return r;
8117 	}
8118 
8119 	r = radeon_vm_manager_init(rdev);
8120 	if (r) {
8121 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8122 		return r;
8123 	}
8124 
8125 	r = dce6_audio_init(rdev);
8126 	if (r)
8127 		return r;
8128 
8129 	return 0;
8130 }
8131 
8132 /**
8133  * cik_resume - resume the asic to a functional state
8134  *
8135  * @rdev: radeon_device pointer
8136  *
8137  * Programs the asic to a functional state (CIK).
8138  * Called at resume.
8139  * Returns 0 for success, error for failure.
8140  */
8141 int cik_resume(struct radeon_device *rdev)
8142 {
8143 	int r;
8144 
8145 	/* post card */
8146 	atom_asic_init(rdev->mode_info.atom_context);
8147 
8148 	/* init golden registers */
8149 	cik_init_golden_registers(rdev);
8150 
8151 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8152 		radeon_pm_resume(rdev);
8153 
8154 	rdev->accel_working = true;
8155 	r = cik_startup(rdev);
8156 	if (r) {
8157 		DRM_ERROR("cik startup failed on resume\n");
8158 		rdev->accel_working = false;
8159 		return r;
8160 	}
8161 
8162 	return r;
8163 
8164 }
8165 
8166 /**
8167  * cik_suspend - suspend the asic
8168  *
8169  * @rdev: radeon_device pointer
8170  *
8171  * Bring the chip into a state suitable for suspend (CIK).
8172  * Called at suspend.
8173  * Returns 0 for success.
8174  */
8175 int cik_suspend(struct radeon_device *rdev)
8176 {
8177 	radeon_pm_suspend(rdev);
8178 	dce6_audio_fini(rdev);
8179 	radeon_vm_manager_fini(rdev);
8180 	cik_cp_enable(rdev, false);
8181 	cik_sdma_enable(rdev, false);
8182 	uvd_v1_0_fini(rdev);
8183 	radeon_uvd_suspend(rdev);
8184 	radeon_vce_suspend(rdev);
8185 	cik_fini_pg(rdev);
8186 	cik_fini_cg(rdev);
8187 	cik_irq_suspend(rdev);
8188 	radeon_wb_disable(rdev);
8189 	cik_pcie_gart_disable(rdev);
8190 	return 0;
8191 }
8192 
8193 /* Plan is to move initialization in that function and use
8194  * helper function so that radeon_device_init pretty much
8195  * do nothing more than calling asic specific function. This
8196  * should also allow to remove a bunch of callback function
8197  * like vram_info.
8198  */
8199 /**
8200  * cik_init - asic specific driver and hw init
8201  *
8202  * @rdev: radeon_device pointer
8203  *
8204  * Setup asic specific driver variables and program the hw
8205  * to a functional state (CIK).
8206  * Called at driver startup.
8207  * Returns 0 for success, errors for failure.
8208  */
8209 int cik_init(struct radeon_device *rdev)
8210 {
8211 	struct radeon_ring *ring;
8212 	int r;
8213 
8214 	/* Read BIOS */
8215 	if (!radeon_get_bios(rdev)) {
8216 		if (ASIC_IS_AVIVO(rdev))
8217 			return -EINVAL;
8218 	}
8219 	/* Must be an ATOMBIOS */
8220 	if (!rdev->is_atom_bios) {
8221 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8222 		return -EINVAL;
8223 	}
8224 	r = radeon_atombios_init(rdev);
8225 	if (r)
8226 		return r;
8227 
8228 	/* Post card if necessary */
8229 	if (!radeon_card_posted(rdev)) {
8230 		if (!rdev->bios) {
8231 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8232 			return -EINVAL;
8233 		}
8234 		DRM_INFO("GPU not posted. posting now...\n");
8235 		atom_asic_init(rdev->mode_info.atom_context);
8236 	}
8237 	/* init golden registers */
8238 	cik_init_golden_registers(rdev);
8239 	/* Initialize scratch registers */
8240 	cik_scratch_init(rdev);
8241 	/* Initialize surface registers */
8242 	radeon_surface_init(rdev);
8243 	/* Initialize clocks */
8244 	radeon_get_clock_info(rdev->ddev);
8245 
8246 	/* Fence driver */
8247 	r = radeon_fence_driver_init(rdev);
8248 	if (r)
8249 		return r;
8250 
8251 	/* initialize memory controller */
8252 	r = cik_mc_init(rdev);
8253 	if (r)
8254 		return r;
8255 	/* Memory manager */
8256 	r = radeon_bo_init(rdev);
8257 	if (r)
8258 		return r;
8259 
8260 	if (rdev->flags & RADEON_IS_IGP) {
8261 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8262 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8263 			r = cik_init_microcode(rdev);
8264 			if (r) {
8265 				DRM_ERROR("Failed to load firmware!\n");
8266 				return r;
8267 			}
8268 		}
8269 	} else {
8270 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8271 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8272 		    !rdev->mc_fw) {
8273 			r = cik_init_microcode(rdev);
8274 			if (r) {
8275 				DRM_ERROR("Failed to load firmware!\n");
8276 				return r;
8277 			}
8278 		}
8279 	}
8280 
8281 	/* Initialize power management */
8282 	radeon_pm_init(rdev);
8283 
8284 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8285 	ring->ring_obj = NULL;
8286 	r600_ring_init(rdev, ring, 1024 * 1024);
8287 
8288 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8289 	ring->ring_obj = NULL;
8290 	r600_ring_init(rdev, ring, 1024 * 1024);
8291 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8292 	if (r)
8293 		return r;
8294 
8295 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8296 	ring->ring_obj = NULL;
8297 	r600_ring_init(rdev, ring, 1024 * 1024);
8298 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8299 	if (r)
8300 		return r;
8301 
8302 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8303 	ring->ring_obj = NULL;
8304 	r600_ring_init(rdev, ring, 256 * 1024);
8305 
8306 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8307 	ring->ring_obj = NULL;
8308 	r600_ring_init(rdev, ring, 256 * 1024);
8309 
8310 	r = radeon_uvd_init(rdev);
8311 	if (!r) {
8312 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8313 		ring->ring_obj = NULL;
8314 		r600_ring_init(rdev, ring, 4096);
8315 	}
8316 
8317 	r = radeon_vce_init(rdev);
8318 	if (!r) {
8319 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8320 		ring->ring_obj = NULL;
8321 		r600_ring_init(rdev, ring, 4096);
8322 
8323 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8324 		ring->ring_obj = NULL;
8325 		r600_ring_init(rdev, ring, 4096);
8326 	}
8327 
8328 	rdev->ih.ring_obj = NULL;
8329 	r600_ih_ring_init(rdev, 64 * 1024);
8330 
8331 	r = r600_pcie_gart_init(rdev);
8332 	if (r)
8333 		return r;
8334 
8335 	rdev->accel_working = true;
8336 	r = cik_startup(rdev);
8337 	if (r) {
8338 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8339 		cik_cp_fini(rdev);
8340 		cik_sdma_fini(rdev);
8341 		cik_irq_fini(rdev);
8342 		sumo_rlc_fini(rdev);
8343 		cik_mec_fini(rdev);
8344 		radeon_wb_fini(rdev);
8345 		radeon_ib_pool_fini(rdev);
8346 		radeon_vm_manager_fini(rdev);
8347 		radeon_irq_kms_fini(rdev);
8348 		cik_pcie_gart_fini(rdev);
8349 		rdev->accel_working = false;
8350 	}
8351 
8352 	/* Don't start up if the MC ucode is missing.
8353 	 * The default clocks and voltages before the MC ucode
8354 	 * is loaded are not suffient for advanced operations.
8355 	 */
8356 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8357 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8358 		return -EINVAL;
8359 	}
8360 
8361 	return 0;
8362 }
8363 
8364 /**
8365  * cik_fini - asic specific driver and hw fini
8366  *
8367  * @rdev: radeon_device pointer
8368  *
8369  * Tear down the asic specific driver variables and program the hw
8370  * to an idle state (CIK).
8371  * Called at driver unload.
8372  */
8373 void cik_fini(struct radeon_device *rdev)
8374 {
8375 	radeon_pm_fini(rdev);
8376 	cik_cp_fini(rdev);
8377 	cik_sdma_fini(rdev);
8378 	cik_fini_pg(rdev);
8379 	cik_fini_cg(rdev);
8380 	cik_irq_fini(rdev);
8381 	sumo_rlc_fini(rdev);
8382 	cik_mec_fini(rdev);
8383 	radeon_wb_fini(rdev);
8384 	radeon_vm_manager_fini(rdev);
8385 	radeon_ib_pool_fini(rdev);
8386 	radeon_irq_kms_fini(rdev);
8387 	uvd_v1_0_fini(rdev);
8388 	radeon_uvd_fini(rdev);
8389 	radeon_vce_fini(rdev);
8390 	cik_pcie_gart_fini(rdev);
8391 	r600_vram_scratch_fini(rdev);
8392 	radeon_gem_fini(rdev);
8393 	radeon_fence_driver_fini(rdev);
8394 	radeon_bo_fini(rdev);
8395 	radeon_atombios_fini(rdev);
8396 	kfree(rdev->bios);
8397 	rdev->bios = NULL;
8398 }
8399 
8400 void dce8_program_fmt(struct drm_encoder *encoder)
8401 {
8402 	struct drm_device *dev = encoder->dev;
8403 	struct radeon_device *rdev = dev->dev_private;
8404 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8405 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8406 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8407 	int bpc = 0;
8408 	u32 tmp = 0;
8409 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8410 
8411 	if (connector) {
8412 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8413 		bpc = radeon_get_monitor_bpc(connector);
8414 		dither = radeon_connector->dither;
8415 	}
8416 
8417 	/* LVDS/eDP FMT is set up by atom */
8418 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8419 		return;
8420 
8421 	/* not needed for analog */
8422 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8423 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8424 		return;
8425 
8426 	if (bpc == 0)
8427 		return;
8428 
8429 	switch (bpc) {
8430 	case 6:
8431 		if (dither == RADEON_FMT_DITHER_ENABLE)
8432 			/* XXX sort out optimal dither settings */
8433 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8434 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8435 		else
8436 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8437 		break;
8438 	case 8:
8439 		if (dither == RADEON_FMT_DITHER_ENABLE)
8440 			/* XXX sort out optimal dither settings */
8441 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8442 				FMT_RGB_RANDOM_ENABLE |
8443 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8444 		else
8445 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8446 		break;
8447 	case 10:
8448 		if (dither == RADEON_FMT_DITHER_ENABLE)
8449 			/* XXX sort out optimal dither settings */
8450 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8451 				FMT_RGB_RANDOM_ENABLE |
8452 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8453 		else
8454 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8455 		break;
8456 	default:
8457 		/* not needed */
8458 		break;
8459 	}
8460 
8461 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8462 }
8463 
8464 /* display watermark setup */
8465 /**
8466  * dce8_line_buffer_adjust - Set up the line buffer
8467  *
8468  * @rdev: radeon_device pointer
8469  * @radeon_crtc: the selected display controller
8470  * @mode: the current display mode on the selected display
8471  * controller
8472  *
8473  * Setup up the line buffer allocation for
8474  * the selected display controller (CIK).
8475  * Returns the line buffer size in pixels.
8476  */
8477 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8478 				   struct radeon_crtc *radeon_crtc,
8479 				   struct drm_display_mode *mode)
8480 {
8481 	u32 tmp, buffer_alloc, i;
8482 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8483 	/*
8484 	 * Line Buffer Setup
8485 	 * There are 6 line buffers, one for each display controllers.
8486 	 * There are 3 partitions per LB. Select the number of partitions
8487 	 * to enable based on the display width.  For display widths larger
8488 	 * than 4096, you need use to use 2 display controllers and combine
8489 	 * them using the stereo blender.
8490 	 */
8491 	if (radeon_crtc->base.enabled && mode) {
8492 		if (mode->crtc_hdisplay < 1920) {
8493 			tmp = 1;
8494 			buffer_alloc = 2;
8495 		} else if (mode->crtc_hdisplay < 2560) {
8496 			tmp = 2;
8497 			buffer_alloc = 2;
8498 		} else if (mode->crtc_hdisplay < 4096) {
8499 			tmp = 0;
8500 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8501 		} else {
8502 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8503 			tmp = 0;
8504 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8505 		}
8506 	} else {
8507 		tmp = 1;
8508 		buffer_alloc = 0;
8509 	}
8510 
8511 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8512 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8513 
8514 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8515 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8516 	for (i = 0; i < rdev->usec_timeout; i++) {
8517 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8518 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8519 			break;
8520 		udelay(1);
8521 	}
8522 
8523 	if (radeon_crtc->base.enabled && mode) {
8524 		switch (tmp) {
8525 		case 0:
8526 		default:
8527 			return 4096 * 2;
8528 		case 1:
8529 			return 1920 * 2;
8530 		case 2:
8531 			return 2560 * 2;
8532 		}
8533 	}
8534 
8535 	/* controller not enabled, so no lb used */
8536 	return 0;
8537 }
8538 
8539 /**
8540  * cik_get_number_of_dram_channels - get the number of dram channels
8541  *
8542  * @rdev: radeon_device pointer
8543  *
8544  * Look up the number of video ram channels (CIK).
8545  * Used for display watermark bandwidth calculations
8546  * Returns the number of dram channels
8547  */
8548 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8549 {
8550 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8551 
8552 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8553 	case 0:
8554 	default:
8555 		return 1;
8556 	case 1:
8557 		return 2;
8558 	case 2:
8559 		return 4;
8560 	case 3:
8561 		return 8;
8562 	case 4:
8563 		return 3;
8564 	case 5:
8565 		return 6;
8566 	case 6:
8567 		return 10;
8568 	case 7:
8569 		return 12;
8570 	case 8:
8571 		return 16;
8572 	}
8573 }
8574 
8575 struct dce8_wm_params {
8576 	u32 dram_channels; /* number of dram channels */
8577 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8578 	u32 sclk;          /* engine clock in kHz */
8579 	u32 disp_clk;      /* display clock in kHz */
8580 	u32 src_width;     /* viewport width */
8581 	u32 active_time;   /* active display time in ns */
8582 	u32 blank_time;    /* blank time in ns */
8583 	bool interlaced;    /* mode is interlaced */
8584 	fixed20_12 vsc;    /* vertical scale ratio */
8585 	u32 num_heads;     /* number of active crtcs */
8586 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8587 	u32 lb_size;       /* line buffer allocated to pipe */
8588 	u32 vtaps;         /* vertical scaler taps */
8589 };
8590 
8591 /**
8592  * dce8_dram_bandwidth - get the dram bandwidth
8593  *
8594  * @wm: watermark calculation data
8595  *
8596  * Calculate the raw dram bandwidth (CIK).
8597  * Used for display watermark bandwidth calculations
8598  * Returns the dram bandwidth in MBytes/s
8599  */
8600 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8601 {
8602 	/* Calculate raw DRAM Bandwidth */
8603 	fixed20_12 dram_efficiency; /* 0.7 */
8604 	fixed20_12 yclk, dram_channels, bandwidth;
8605 	fixed20_12 a;
8606 
8607 	a.full = dfixed_const(1000);
8608 	yclk.full = dfixed_const(wm->yclk);
8609 	yclk.full = dfixed_div(yclk, a);
8610 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8611 	a.full = dfixed_const(10);
8612 	dram_efficiency.full = dfixed_const(7);
8613 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8614 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8615 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8616 
8617 	return dfixed_trunc(bandwidth);
8618 }
8619 
8620 /**
8621  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8622  *
8623  * @wm: watermark calculation data
8624  *
8625  * Calculate the dram bandwidth used for display (CIK).
8626  * Used for display watermark bandwidth calculations
8627  * Returns the dram bandwidth for display in MBytes/s
8628  */
8629 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8630 {
8631 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8632 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8633 	fixed20_12 yclk, dram_channels, bandwidth;
8634 	fixed20_12 a;
8635 
8636 	a.full = dfixed_const(1000);
8637 	yclk.full = dfixed_const(wm->yclk);
8638 	yclk.full = dfixed_div(yclk, a);
8639 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8640 	a.full = dfixed_const(10);
8641 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8642 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8643 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8644 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8645 
8646 	return dfixed_trunc(bandwidth);
8647 }
8648 
8649 /**
8650  * dce8_data_return_bandwidth - get the data return bandwidth
8651  *
8652  * @wm: watermark calculation data
8653  *
8654  * Calculate the data return bandwidth used for display (CIK).
8655  * Used for display watermark bandwidth calculations
8656  * Returns the data return bandwidth in MBytes/s
8657  */
8658 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8659 {
8660 	/* Calculate the display Data return Bandwidth */
8661 	fixed20_12 return_efficiency; /* 0.8 */
8662 	fixed20_12 sclk, bandwidth;
8663 	fixed20_12 a;
8664 
8665 	a.full = dfixed_const(1000);
8666 	sclk.full = dfixed_const(wm->sclk);
8667 	sclk.full = dfixed_div(sclk, a);
8668 	a.full = dfixed_const(10);
8669 	return_efficiency.full = dfixed_const(8);
8670 	return_efficiency.full = dfixed_div(return_efficiency, a);
8671 	a.full = dfixed_const(32);
8672 	bandwidth.full = dfixed_mul(a, sclk);
8673 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8674 
8675 	return dfixed_trunc(bandwidth);
8676 }
8677 
8678 /**
8679  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8680  *
8681  * @wm: watermark calculation data
8682  *
8683  * Calculate the dmif bandwidth used for display (CIK).
8684  * Used for display watermark bandwidth calculations
8685  * Returns the dmif bandwidth in MBytes/s
8686  */
8687 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8688 {
8689 	/* Calculate the DMIF Request Bandwidth */
8690 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8691 	fixed20_12 disp_clk, bandwidth;
8692 	fixed20_12 a, b;
8693 
8694 	a.full = dfixed_const(1000);
8695 	disp_clk.full = dfixed_const(wm->disp_clk);
8696 	disp_clk.full = dfixed_div(disp_clk, a);
8697 	a.full = dfixed_const(32);
8698 	b.full = dfixed_mul(a, disp_clk);
8699 
8700 	a.full = dfixed_const(10);
8701 	disp_clk_request_efficiency.full = dfixed_const(8);
8702 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8703 
8704 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8705 
8706 	return dfixed_trunc(bandwidth);
8707 }
8708 
8709 /**
8710  * dce8_available_bandwidth - get the min available bandwidth
8711  *
8712  * @wm: watermark calculation data
8713  *
8714  * Calculate the min available bandwidth used for display (CIK).
8715  * Used for display watermark bandwidth calculations
8716  * Returns the min available bandwidth in MBytes/s
8717  */
8718 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8719 {
8720 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8721 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8722 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8723 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8724 
8725 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8726 }
8727 
8728 /**
8729  * dce8_average_bandwidth - get the average available bandwidth
8730  *
8731  * @wm: watermark calculation data
8732  *
8733  * Calculate the average available bandwidth used for display (CIK).
8734  * Used for display watermark bandwidth calculations
8735  * Returns the average available bandwidth in MBytes/s
8736  */
8737 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8738 {
8739 	/* Calculate the display mode Average Bandwidth
8740 	 * DisplayMode should contain the source and destination dimensions,
8741 	 * timing, etc.
8742 	 */
8743 	fixed20_12 bpp;
8744 	fixed20_12 line_time;
8745 	fixed20_12 src_width;
8746 	fixed20_12 bandwidth;
8747 	fixed20_12 a;
8748 
8749 	a.full = dfixed_const(1000);
8750 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8751 	line_time.full = dfixed_div(line_time, a);
8752 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8753 	src_width.full = dfixed_const(wm->src_width);
8754 	bandwidth.full = dfixed_mul(src_width, bpp);
8755 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8756 	bandwidth.full = dfixed_div(bandwidth, line_time);
8757 
8758 	return dfixed_trunc(bandwidth);
8759 }
8760 
8761 /**
8762  * dce8_latency_watermark - get the latency watermark
8763  *
8764  * @wm: watermark calculation data
8765  *
8766  * Calculate the latency watermark (CIK).
8767  * Used for display watermark bandwidth calculations
8768  * Returns the latency watermark in ns
8769  */
8770 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8771 {
8772 	/* First calculate the latency in ns */
8773 	u32 mc_latency = 2000; /* 2000 ns. */
8774 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8775 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8776 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8777 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8778 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8779 		(wm->num_heads * cursor_line_pair_return_time);
8780 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8781 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8782 	u32 tmp, dmif_size = 12288;
8783 	fixed20_12 a, b, c;
8784 
8785 	if (wm->num_heads == 0)
8786 		return 0;
8787 
8788 	a.full = dfixed_const(2);
8789 	b.full = dfixed_const(1);
8790 	if ((wm->vsc.full > a.full) ||
8791 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8792 	    (wm->vtaps >= 5) ||
8793 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8794 		max_src_lines_per_dst_line = 4;
8795 	else
8796 		max_src_lines_per_dst_line = 2;
8797 
8798 	a.full = dfixed_const(available_bandwidth);
8799 	b.full = dfixed_const(wm->num_heads);
8800 	a.full = dfixed_div(a, b);
8801 
8802 	b.full = dfixed_const(mc_latency + 512);
8803 	c.full = dfixed_const(wm->disp_clk);
8804 	b.full = dfixed_div(b, c);
8805 
8806 	c.full = dfixed_const(dmif_size);
8807 	b.full = dfixed_div(c, b);
8808 
8809 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8810 
8811 	b.full = dfixed_const(1000);
8812 	c.full = dfixed_const(wm->disp_clk);
8813 	b.full = dfixed_div(c, b);
8814 	c.full = dfixed_const(wm->bytes_per_pixel);
8815 	b.full = dfixed_mul(b, c);
8816 
8817 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8818 
8819 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8820 	b.full = dfixed_const(1000);
8821 	c.full = dfixed_const(lb_fill_bw);
8822 	b.full = dfixed_div(c, b);
8823 	a.full = dfixed_div(a, b);
8824 	line_fill_time = dfixed_trunc(a);
8825 
8826 	if (line_fill_time < wm->active_time)
8827 		return latency;
8828 	else
8829 		return latency + (line_fill_time - wm->active_time);
8830 
8831 }
8832 
8833 /**
8834  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8835  * average and available dram bandwidth
8836  *
8837  * @wm: watermark calculation data
8838  *
8839  * Check if the display average bandwidth fits in the display
8840  * dram bandwidth (CIK).
8841  * Used for display watermark bandwidth calculations
8842  * Returns true if the display fits, false if not.
8843  */
8844 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8845 {
8846 	if (dce8_average_bandwidth(wm) <=
8847 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8848 		return true;
8849 	else
8850 		return false;
8851 }
8852 
8853 /**
8854  * dce8_average_bandwidth_vs_available_bandwidth - check
8855  * average and available bandwidth
8856  *
8857  * @wm: watermark calculation data
8858  *
8859  * Check if the display average bandwidth fits in the display
8860  * available bandwidth (CIK).
8861  * Used for display watermark bandwidth calculations
8862  * Returns true if the display fits, false if not.
8863  */
8864 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8865 {
8866 	if (dce8_average_bandwidth(wm) <=
8867 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8868 		return true;
8869 	else
8870 		return false;
8871 }
8872 
8873 /**
8874  * dce8_check_latency_hiding - check latency hiding
8875  *
8876  * @wm: watermark calculation data
8877  *
8878  * Check latency hiding (CIK).
8879  * Used for display watermark bandwidth calculations
8880  * Returns true if the display fits, false if not.
8881  */
8882 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8883 {
8884 	u32 lb_partitions = wm->lb_size / wm->src_width;
8885 	u32 line_time = wm->active_time + wm->blank_time;
8886 	u32 latency_tolerant_lines;
8887 	u32 latency_hiding;
8888 	fixed20_12 a;
8889 
8890 	a.full = dfixed_const(1);
8891 	if (wm->vsc.full > a.full)
8892 		latency_tolerant_lines = 1;
8893 	else {
8894 		if (lb_partitions <= (wm->vtaps + 1))
8895 			latency_tolerant_lines = 1;
8896 		else
8897 			latency_tolerant_lines = 2;
8898 	}
8899 
8900 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8901 
8902 	if (dce8_latency_watermark(wm) <= latency_hiding)
8903 		return true;
8904 	else
8905 		return false;
8906 }
8907 
8908 /**
8909  * dce8_program_watermarks - program display watermarks
8910  *
8911  * @rdev: radeon_device pointer
8912  * @radeon_crtc: the selected display controller
8913  * @lb_size: line buffer size
8914  * @num_heads: number of display controllers in use
8915  *
8916  * Calculate and program the display watermarks for the
8917  * selected display controller (CIK).
8918  */
8919 static void dce8_program_watermarks(struct radeon_device *rdev,
8920 				    struct radeon_crtc *radeon_crtc,
8921 				    u32 lb_size, u32 num_heads)
8922 {
8923 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8924 	struct dce8_wm_params wm_low, wm_high;
8925 	u32 pixel_period;
8926 	u32 line_time = 0;
8927 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8928 	u32 tmp, wm_mask;
8929 
8930 	if (radeon_crtc->base.enabled && num_heads && mode) {
8931 		pixel_period = 1000000 / (u32)mode->clock;
8932 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8933 
8934 		/* watermark for high clocks */
8935 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8936 		    rdev->pm.dpm_enabled) {
8937 			wm_high.yclk =
8938 				radeon_dpm_get_mclk(rdev, false) * 10;
8939 			wm_high.sclk =
8940 				radeon_dpm_get_sclk(rdev, false) * 10;
8941 		} else {
8942 			wm_high.yclk = rdev->pm.current_mclk * 10;
8943 			wm_high.sclk = rdev->pm.current_sclk * 10;
8944 		}
8945 
8946 		wm_high.disp_clk = mode->clock;
8947 		wm_high.src_width = mode->crtc_hdisplay;
8948 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8949 		wm_high.blank_time = line_time - wm_high.active_time;
8950 		wm_high.interlaced = false;
8951 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8952 			wm_high.interlaced = true;
8953 		wm_high.vsc = radeon_crtc->vsc;
8954 		wm_high.vtaps = 1;
8955 		if (radeon_crtc->rmx_type != RMX_OFF)
8956 			wm_high.vtaps = 2;
8957 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8958 		wm_high.lb_size = lb_size;
8959 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8960 		wm_high.num_heads = num_heads;
8961 
8962 		/* set for high clocks */
8963 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8964 
8965 		/* possibly force display priority to high */
8966 		/* should really do this at mode validation time... */
8967 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8968 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8969 		    !dce8_check_latency_hiding(&wm_high) ||
8970 		    (rdev->disp_priority == 2)) {
8971 			DRM_DEBUG_KMS("force priority to high\n");
8972 		}
8973 
8974 		/* watermark for low clocks */
8975 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8976 		    rdev->pm.dpm_enabled) {
8977 			wm_low.yclk =
8978 				radeon_dpm_get_mclk(rdev, true) * 10;
8979 			wm_low.sclk =
8980 				radeon_dpm_get_sclk(rdev, true) * 10;
8981 		} else {
8982 			wm_low.yclk = rdev->pm.current_mclk * 10;
8983 			wm_low.sclk = rdev->pm.current_sclk * 10;
8984 		}
8985 
8986 		wm_low.disp_clk = mode->clock;
8987 		wm_low.src_width = mode->crtc_hdisplay;
8988 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8989 		wm_low.blank_time = line_time - wm_low.active_time;
8990 		wm_low.interlaced = false;
8991 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8992 			wm_low.interlaced = true;
8993 		wm_low.vsc = radeon_crtc->vsc;
8994 		wm_low.vtaps = 1;
8995 		if (radeon_crtc->rmx_type != RMX_OFF)
8996 			wm_low.vtaps = 2;
8997 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8998 		wm_low.lb_size = lb_size;
8999 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9000 		wm_low.num_heads = num_heads;
9001 
9002 		/* set for low clocks */
9003 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9004 
9005 		/* possibly force display priority to high */
9006 		/* should really do this at mode validation time... */
9007 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9008 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9009 		    !dce8_check_latency_hiding(&wm_low) ||
9010 		    (rdev->disp_priority == 2)) {
9011 			DRM_DEBUG_KMS("force priority to high\n");
9012 		}
9013 	}
9014 
9015 	/* select wm A */
9016 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9017 	tmp = wm_mask;
9018 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9019 	tmp |= LATENCY_WATERMARK_MASK(1);
9020 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9021 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9022 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9023 		LATENCY_HIGH_WATERMARK(line_time)));
9024 	/* select wm B */
9025 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9026 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9027 	tmp |= LATENCY_WATERMARK_MASK(2);
9028 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9029 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9030 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9031 		LATENCY_HIGH_WATERMARK(line_time)));
9032 	/* restore original selection */
9033 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9034 
9035 	/* save values for DPM */
9036 	radeon_crtc->line_time = line_time;
9037 	radeon_crtc->wm_high = latency_watermark_a;
9038 	radeon_crtc->wm_low = latency_watermark_b;
9039 }
9040 
9041 /**
9042  * dce8_bandwidth_update - program display watermarks
9043  *
9044  * @rdev: radeon_device pointer
9045  *
9046  * Calculate and program the display watermarks and line
9047  * buffer allocation (CIK).
9048  */
9049 void dce8_bandwidth_update(struct radeon_device *rdev)
9050 {
9051 	struct drm_display_mode *mode = NULL;
9052 	u32 num_heads = 0, lb_size;
9053 	int i;
9054 
9055 	radeon_update_display_priority(rdev);
9056 
9057 	for (i = 0; i < rdev->num_crtc; i++) {
9058 		if (rdev->mode_info.crtcs[i]->base.enabled)
9059 			num_heads++;
9060 	}
9061 	for (i = 0; i < rdev->num_crtc; i++) {
9062 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9063 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9064 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9065 	}
9066 }
9067 
9068 /**
9069  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9070  *
9071  * @rdev: radeon_device pointer
9072  *
9073  * Fetches a GPU clock counter snapshot (SI).
9074  * Returns the 64 bit clock counter snapshot.
9075  */
9076 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9077 {
9078 	uint64_t clock;
9079 
9080 	mutex_lock(&rdev->gpu_clock_mutex);
9081 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9082 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9083 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9084 	mutex_unlock(&rdev->gpu_clock_mutex);
9085 	return clock;
9086 }
9087 
9088 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9089                               u32 cntl_reg, u32 status_reg)
9090 {
9091 	int r, i;
9092 	struct atom_clock_dividers dividers;
9093 	uint32_t tmp;
9094 
9095 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9096 					   clock, false, &dividers);
9097 	if (r)
9098 		return r;
9099 
9100 	tmp = RREG32_SMC(cntl_reg);
9101 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9102 	tmp |= dividers.post_divider;
9103 	WREG32_SMC(cntl_reg, tmp);
9104 
9105 	for (i = 0; i < 100; i++) {
9106 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9107 			break;
9108 		mdelay(10);
9109 	}
9110 	if (i == 100)
9111 		return -ETIMEDOUT;
9112 
9113 	return 0;
9114 }
9115 
9116 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9117 {
9118 	int r = 0;
9119 
9120 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9121 	if (r)
9122 		return r;
9123 
9124 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9125 	return r;
9126 }
9127 
9128 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9129 {
9130 	int r, i;
9131 	struct atom_clock_dividers dividers;
9132 	u32 tmp;
9133 
9134 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9135 					   ecclk, false, &dividers);
9136 	if (r)
9137 		return r;
9138 
9139 	for (i = 0; i < 100; i++) {
9140 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9141 			break;
9142 		mdelay(10);
9143 	}
9144 	if (i == 100)
9145 		return -ETIMEDOUT;
9146 
9147 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9148 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9149 	tmp |= dividers.post_divider;
9150 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9151 
9152 	for (i = 0; i < 100; i++) {
9153 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9154 			break;
9155 		mdelay(10);
9156 	}
9157 	if (i == 100)
9158 		return -ETIMEDOUT;
9159 
9160 	return 0;
9161 }
9162 
9163 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9164 {
9165 	struct pci_dev *root = rdev->pdev->bus->self;
9166 	int bridge_pos, gpu_pos;
9167 	u32 speed_cntl, mask, current_data_rate;
9168 	int ret, i;
9169 	u16 tmp16;
9170 
9171 	if (radeon_pcie_gen2 == 0)
9172 		return;
9173 
9174 	if (rdev->flags & RADEON_IS_IGP)
9175 		return;
9176 
9177 	if (!(rdev->flags & RADEON_IS_PCIE))
9178 		return;
9179 
9180 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9181 	if (ret != 0)
9182 		return;
9183 
9184 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9185 		return;
9186 
9187 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9188 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9189 		LC_CURRENT_DATA_RATE_SHIFT;
9190 	if (mask & DRM_PCIE_SPEED_80) {
9191 		if (current_data_rate == 2) {
9192 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9193 			return;
9194 		}
9195 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9196 	} else if (mask & DRM_PCIE_SPEED_50) {
9197 		if (current_data_rate == 1) {
9198 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9199 			return;
9200 		}
9201 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9202 	}
9203 
9204 	bridge_pos = pci_pcie_cap(root);
9205 	if (!bridge_pos)
9206 		return;
9207 
9208 	gpu_pos = pci_pcie_cap(rdev->pdev);
9209 	if (!gpu_pos)
9210 		return;
9211 
9212 	if (mask & DRM_PCIE_SPEED_80) {
9213 		/* re-try equalization if gen3 is not already enabled */
9214 		if (current_data_rate != 2) {
9215 			u16 bridge_cfg, gpu_cfg;
9216 			u16 bridge_cfg2, gpu_cfg2;
9217 			u32 max_lw, current_lw, tmp;
9218 
9219 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9220 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9221 
9222 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9223 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9224 
9225 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9226 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9227 
9228 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9229 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9230 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9231 
9232 			if (current_lw < max_lw) {
9233 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9234 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9235 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9236 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9237 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9238 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9239 				}
9240 			}
9241 
9242 			for (i = 0; i < 10; i++) {
9243 				/* check status */
9244 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9245 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9246 					break;
9247 
9248 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9249 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9250 
9251 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9252 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9253 
9254 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9255 				tmp |= LC_SET_QUIESCE;
9256 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9257 
9258 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9259 				tmp |= LC_REDO_EQ;
9260 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9261 
9262 				mdelay(100);
9263 
9264 				/* linkctl */
9265 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9266 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9267 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9268 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9269 
9270 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9271 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9272 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9273 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9274 
9275 				/* linkctl2 */
9276 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9277 				tmp16 &= ~((1 << 4) | (7 << 9));
9278 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9279 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9280 
9281 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9282 				tmp16 &= ~((1 << 4) | (7 << 9));
9283 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9284 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9285 
9286 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9287 				tmp &= ~LC_SET_QUIESCE;
9288 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9289 			}
9290 		}
9291 	}
9292 
9293 	/* set the link speed */
9294 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9295 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9296 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9297 
9298 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9299 	tmp16 &= ~0xf;
9300 	if (mask & DRM_PCIE_SPEED_80)
9301 		tmp16 |= 3; /* gen3 */
9302 	else if (mask & DRM_PCIE_SPEED_50)
9303 		tmp16 |= 2; /* gen2 */
9304 	else
9305 		tmp16 |= 1; /* gen1 */
9306 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9307 
9308 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9309 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9310 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9311 
9312 	for (i = 0; i < rdev->usec_timeout; i++) {
9313 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9314 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9315 			break;
9316 		udelay(1);
9317 	}
9318 }
9319 
9320 static void cik_program_aspm(struct radeon_device *rdev)
9321 {
9322 	u32 data, orig;
9323 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9324 	bool disable_clkreq = false;
9325 
9326 	if (radeon_aspm == 0)
9327 		return;
9328 
9329 	/* XXX double check IGPs */
9330 	if (rdev->flags & RADEON_IS_IGP)
9331 		return;
9332 
9333 	if (!(rdev->flags & RADEON_IS_PCIE))
9334 		return;
9335 
9336 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9337 	data &= ~LC_XMIT_N_FTS_MASK;
9338 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9339 	if (orig != data)
9340 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9341 
9342 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9343 	data |= LC_GO_TO_RECOVERY;
9344 	if (orig != data)
9345 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9346 
9347 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9348 	data |= P_IGNORE_EDB_ERR;
9349 	if (orig != data)
9350 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9351 
9352 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9353 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9354 	data |= LC_PMI_TO_L1_DIS;
9355 	if (!disable_l0s)
9356 		data |= LC_L0S_INACTIVITY(7);
9357 
9358 	if (!disable_l1) {
9359 		data |= LC_L1_INACTIVITY(7);
9360 		data &= ~LC_PMI_TO_L1_DIS;
9361 		if (orig != data)
9362 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9363 
9364 		if (!disable_plloff_in_l1) {
9365 			bool clk_req_support;
9366 
9367 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9368 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9369 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9370 			if (orig != data)
9371 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9372 
9373 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9374 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9375 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9376 			if (orig != data)
9377 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9378 
9379 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9380 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9381 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9382 			if (orig != data)
9383 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9384 
9385 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9386 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9387 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9388 			if (orig != data)
9389 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9390 
9391 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9392 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9393 			data |= LC_DYN_LANES_PWR_STATE(3);
9394 			if (orig != data)
9395 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9396 
9397 			if (!disable_clkreq) {
9398 				struct pci_dev *root = rdev->pdev->bus->self;
9399 				u32 lnkcap;
9400 
9401 				clk_req_support = false;
9402 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9403 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9404 					clk_req_support = true;
9405 			} else {
9406 				clk_req_support = false;
9407 			}
9408 
9409 			if (clk_req_support) {
9410 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9411 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9412 				if (orig != data)
9413 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9414 
9415 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9416 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9417 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9418 				if (orig != data)
9419 					WREG32_SMC(THM_CLK_CNTL, data);
9420 
9421 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9422 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9423 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9424 				if (orig != data)
9425 					WREG32_SMC(MISC_CLK_CTRL, data);
9426 
9427 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9428 				data &= ~BCLK_AS_XCLK;
9429 				if (orig != data)
9430 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9431 
9432 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9433 				data &= ~FORCE_BIF_REFCLK_EN;
9434 				if (orig != data)
9435 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9436 
9437 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9438 				data &= ~MPLL_CLKOUT_SEL_MASK;
9439 				data |= MPLL_CLKOUT_SEL(4);
9440 				if (orig != data)
9441 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9442 			}
9443 		}
9444 	} else {
9445 		if (orig != data)
9446 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9447 	}
9448 
9449 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9450 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9451 	if (orig != data)
9452 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9453 
9454 	if (!disable_l0s) {
9455 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9456 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9457 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9458 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9459 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9460 				data &= ~LC_L0S_INACTIVITY_MASK;
9461 				if (orig != data)
9462 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9463 			}
9464 		}
9465 	}
9466 }
9467