xref: /openbmc/linux/drivers/gpu/drm/radeon/cik.c (revision 84d517f3)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72 
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78 extern void sumo_rlc_fini(struct radeon_device *rdev);
79 extern int sumo_rlc_init(struct radeon_device *rdev);
80 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81 extern void si_rlc_reset(struct radeon_device *rdev);
82 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83 extern int cik_sdma_resume(struct radeon_device *rdev);
84 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
85 extern void cik_sdma_fini(struct radeon_device *rdev);
86 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
87 static void cik_rlc_stop(struct radeon_device *rdev);
88 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
89 static void cik_program_aspm(struct radeon_device *rdev);
90 static void cik_init_pg(struct radeon_device *rdev);
91 static void cik_init_cg(struct radeon_device *rdev);
92 static void cik_fini_pg(struct radeon_device *rdev);
93 static void cik_fini_cg(struct radeon_device *rdev);
94 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
95 					  bool enable);
96 
97 /* get temperature in millidegrees */
98 int ci_get_temp(struct radeon_device *rdev)
99 {
100 	u32 temp;
101 	int actual_temp = 0;
102 
103 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
104 		CTF_TEMP_SHIFT;
105 
106 	if (temp & 0x200)
107 		actual_temp = 255;
108 	else
109 		actual_temp = temp & 0x1ff;
110 
111 	actual_temp = actual_temp * 1000;
112 
113 	return actual_temp;
114 }
115 
116 /* get temperature in millidegrees */
117 int kv_get_temp(struct radeon_device *rdev)
118 {
119 	u32 temp;
120 	int actual_temp = 0;
121 
122 	temp = RREG32_SMC(0xC0300E0C);
123 
124 	if (temp)
125 		actual_temp = (temp / 8) - 49;
126 	else
127 		actual_temp = 0;
128 
129 	actual_temp = actual_temp * 1000;
130 
131 	return actual_temp;
132 }
133 
134 /*
135  * Indirect registers accessor
136  */
137 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
138 {
139 	unsigned long flags;
140 	u32 r;
141 
142 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 	WREG32(PCIE_INDEX, reg);
144 	(void)RREG32(PCIE_INDEX);
145 	r = RREG32(PCIE_DATA);
146 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
147 	return r;
148 }
149 
150 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
151 {
152 	unsigned long flags;
153 
154 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
155 	WREG32(PCIE_INDEX, reg);
156 	(void)RREG32(PCIE_INDEX);
157 	WREG32(PCIE_DATA, v);
158 	(void)RREG32(PCIE_DATA);
159 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
160 }
161 
162 static const u32 spectre_rlc_save_restore_register_list[] =
163 {
164 	(0x0e00 << 16) | (0xc12c >> 2),
165 	0x00000000,
166 	(0x0e00 << 16) | (0xc140 >> 2),
167 	0x00000000,
168 	(0x0e00 << 16) | (0xc150 >> 2),
169 	0x00000000,
170 	(0x0e00 << 16) | (0xc15c >> 2),
171 	0x00000000,
172 	(0x0e00 << 16) | (0xc168 >> 2),
173 	0x00000000,
174 	(0x0e00 << 16) | (0xc170 >> 2),
175 	0x00000000,
176 	(0x0e00 << 16) | (0xc178 >> 2),
177 	0x00000000,
178 	(0x0e00 << 16) | (0xc204 >> 2),
179 	0x00000000,
180 	(0x0e00 << 16) | (0xc2b4 >> 2),
181 	0x00000000,
182 	(0x0e00 << 16) | (0xc2b8 >> 2),
183 	0x00000000,
184 	(0x0e00 << 16) | (0xc2bc >> 2),
185 	0x00000000,
186 	(0x0e00 << 16) | (0xc2c0 >> 2),
187 	0x00000000,
188 	(0x0e00 << 16) | (0x8228 >> 2),
189 	0x00000000,
190 	(0x0e00 << 16) | (0x829c >> 2),
191 	0x00000000,
192 	(0x0e00 << 16) | (0x869c >> 2),
193 	0x00000000,
194 	(0x0600 << 16) | (0x98f4 >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0x98f8 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0x9900 >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0xc260 >> 2),
201 	0x00000000,
202 	(0x0e00 << 16) | (0x90e8 >> 2),
203 	0x00000000,
204 	(0x0e00 << 16) | (0x3c000 >> 2),
205 	0x00000000,
206 	(0x0e00 << 16) | (0x3c00c >> 2),
207 	0x00000000,
208 	(0x0e00 << 16) | (0x8c1c >> 2),
209 	0x00000000,
210 	(0x0e00 << 16) | (0x9700 >> 2),
211 	0x00000000,
212 	(0x0e00 << 16) | (0xcd20 >> 2),
213 	0x00000000,
214 	(0x4e00 << 16) | (0xcd20 >> 2),
215 	0x00000000,
216 	(0x5e00 << 16) | (0xcd20 >> 2),
217 	0x00000000,
218 	(0x6e00 << 16) | (0xcd20 >> 2),
219 	0x00000000,
220 	(0x7e00 << 16) | (0xcd20 >> 2),
221 	0x00000000,
222 	(0x8e00 << 16) | (0xcd20 >> 2),
223 	0x00000000,
224 	(0x9e00 << 16) | (0xcd20 >> 2),
225 	0x00000000,
226 	(0xae00 << 16) | (0xcd20 >> 2),
227 	0x00000000,
228 	(0xbe00 << 16) | (0xcd20 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0x89bc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0x8900 >> 2),
233 	0x00000000,
234 	0x3,
235 	(0x0e00 << 16) | (0xc130 >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0xc134 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xc1fc >> 2),
240 	0x00000000,
241 	(0x0e00 << 16) | (0xc208 >> 2),
242 	0x00000000,
243 	(0x0e00 << 16) | (0xc264 >> 2),
244 	0x00000000,
245 	(0x0e00 << 16) | (0xc268 >> 2),
246 	0x00000000,
247 	(0x0e00 << 16) | (0xc26c >> 2),
248 	0x00000000,
249 	(0x0e00 << 16) | (0xc270 >> 2),
250 	0x00000000,
251 	(0x0e00 << 16) | (0xc274 >> 2),
252 	0x00000000,
253 	(0x0e00 << 16) | (0xc278 >> 2),
254 	0x00000000,
255 	(0x0e00 << 16) | (0xc27c >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0xc280 >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0xc284 >> 2),
260 	0x00000000,
261 	(0x0e00 << 16) | (0xc288 >> 2),
262 	0x00000000,
263 	(0x0e00 << 16) | (0xc28c >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc290 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc294 >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc298 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc29c >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc2a0 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc2a4 >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc2a8 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc2ac  >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc2b0 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0x301d0 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0x30238 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0x30250 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0x30254 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0x30258 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x3025c >> 2),
294 	0x00000000,
295 	(0x4e00 << 16) | (0xc900 >> 2),
296 	0x00000000,
297 	(0x5e00 << 16) | (0xc900 >> 2),
298 	0x00000000,
299 	(0x6e00 << 16) | (0xc900 >> 2),
300 	0x00000000,
301 	(0x7e00 << 16) | (0xc900 >> 2),
302 	0x00000000,
303 	(0x8e00 << 16) | (0xc900 >> 2),
304 	0x00000000,
305 	(0x9e00 << 16) | (0xc900 >> 2),
306 	0x00000000,
307 	(0xae00 << 16) | (0xc900 >> 2),
308 	0x00000000,
309 	(0xbe00 << 16) | (0xc900 >> 2),
310 	0x00000000,
311 	(0x4e00 << 16) | (0xc904 >> 2),
312 	0x00000000,
313 	(0x5e00 << 16) | (0xc904 >> 2),
314 	0x00000000,
315 	(0x6e00 << 16) | (0xc904 >> 2),
316 	0x00000000,
317 	(0x7e00 << 16) | (0xc904 >> 2),
318 	0x00000000,
319 	(0x8e00 << 16) | (0xc904 >> 2),
320 	0x00000000,
321 	(0x9e00 << 16) | (0xc904 >> 2),
322 	0x00000000,
323 	(0xae00 << 16) | (0xc904 >> 2),
324 	0x00000000,
325 	(0xbe00 << 16) | (0xc904 >> 2),
326 	0x00000000,
327 	(0x4e00 << 16) | (0xc908 >> 2),
328 	0x00000000,
329 	(0x5e00 << 16) | (0xc908 >> 2),
330 	0x00000000,
331 	(0x6e00 << 16) | (0xc908 >> 2),
332 	0x00000000,
333 	(0x7e00 << 16) | (0xc908 >> 2),
334 	0x00000000,
335 	(0x8e00 << 16) | (0xc908 >> 2),
336 	0x00000000,
337 	(0x9e00 << 16) | (0xc908 >> 2),
338 	0x00000000,
339 	(0xae00 << 16) | (0xc908 >> 2),
340 	0x00000000,
341 	(0xbe00 << 16) | (0xc908 >> 2),
342 	0x00000000,
343 	(0x4e00 << 16) | (0xc90c >> 2),
344 	0x00000000,
345 	(0x5e00 << 16) | (0xc90c >> 2),
346 	0x00000000,
347 	(0x6e00 << 16) | (0xc90c >> 2),
348 	0x00000000,
349 	(0x7e00 << 16) | (0xc90c >> 2),
350 	0x00000000,
351 	(0x8e00 << 16) | (0xc90c >> 2),
352 	0x00000000,
353 	(0x9e00 << 16) | (0xc90c >> 2),
354 	0x00000000,
355 	(0xae00 << 16) | (0xc90c >> 2),
356 	0x00000000,
357 	(0xbe00 << 16) | (0xc90c >> 2),
358 	0x00000000,
359 	(0x4e00 << 16) | (0xc910 >> 2),
360 	0x00000000,
361 	(0x5e00 << 16) | (0xc910 >> 2),
362 	0x00000000,
363 	(0x6e00 << 16) | (0xc910 >> 2),
364 	0x00000000,
365 	(0x7e00 << 16) | (0xc910 >> 2),
366 	0x00000000,
367 	(0x8e00 << 16) | (0xc910 >> 2),
368 	0x00000000,
369 	(0x9e00 << 16) | (0xc910 >> 2),
370 	0x00000000,
371 	(0xae00 << 16) | (0xc910 >> 2),
372 	0x00000000,
373 	(0xbe00 << 16) | (0xc910 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc99c >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0x9834 >> 2),
378 	0x00000000,
379 	(0x0000 << 16) | (0x30f00 >> 2),
380 	0x00000000,
381 	(0x0001 << 16) | (0x30f00 >> 2),
382 	0x00000000,
383 	(0x0000 << 16) | (0x30f04 >> 2),
384 	0x00000000,
385 	(0x0001 << 16) | (0x30f04 >> 2),
386 	0x00000000,
387 	(0x0000 << 16) | (0x30f08 >> 2),
388 	0x00000000,
389 	(0x0001 << 16) | (0x30f08 >> 2),
390 	0x00000000,
391 	(0x0000 << 16) | (0x30f0c >> 2),
392 	0x00000000,
393 	(0x0001 << 16) | (0x30f0c >> 2),
394 	0x00000000,
395 	(0x0600 << 16) | (0x9b7c >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x8a14 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x8a18 >> 2),
400 	0x00000000,
401 	(0x0600 << 16) | (0x30a00 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x8bf0 >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0x8bcc >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0x8b24 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0x30a04 >> 2),
410 	0x00000000,
411 	(0x0600 << 16) | (0x30a10 >> 2),
412 	0x00000000,
413 	(0x0600 << 16) | (0x30a14 >> 2),
414 	0x00000000,
415 	(0x0600 << 16) | (0x30a18 >> 2),
416 	0x00000000,
417 	(0x0600 << 16) | (0x30a2c >> 2),
418 	0x00000000,
419 	(0x0e00 << 16) | (0xc700 >> 2),
420 	0x00000000,
421 	(0x0e00 << 16) | (0xc704 >> 2),
422 	0x00000000,
423 	(0x0e00 << 16) | (0xc708 >> 2),
424 	0x00000000,
425 	(0x0e00 << 16) | (0xc768 >> 2),
426 	0x00000000,
427 	(0x0400 << 16) | (0xc770 >> 2),
428 	0x00000000,
429 	(0x0400 << 16) | (0xc774 >> 2),
430 	0x00000000,
431 	(0x0400 << 16) | (0xc778 >> 2),
432 	0x00000000,
433 	(0x0400 << 16) | (0xc77c >> 2),
434 	0x00000000,
435 	(0x0400 << 16) | (0xc780 >> 2),
436 	0x00000000,
437 	(0x0400 << 16) | (0xc784 >> 2),
438 	0x00000000,
439 	(0x0400 << 16) | (0xc788 >> 2),
440 	0x00000000,
441 	(0x0400 << 16) | (0xc78c >> 2),
442 	0x00000000,
443 	(0x0400 << 16) | (0xc798 >> 2),
444 	0x00000000,
445 	(0x0400 << 16) | (0xc79c >> 2),
446 	0x00000000,
447 	(0x0400 << 16) | (0xc7a0 >> 2),
448 	0x00000000,
449 	(0x0400 << 16) | (0xc7a4 >> 2),
450 	0x00000000,
451 	(0x0400 << 16) | (0xc7a8 >> 2),
452 	0x00000000,
453 	(0x0400 << 16) | (0xc7ac >> 2),
454 	0x00000000,
455 	(0x0400 << 16) | (0xc7b0 >> 2),
456 	0x00000000,
457 	(0x0400 << 16) | (0xc7b4 >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0x9100 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x3c010 >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x92a8 >> 2),
464 	0x00000000,
465 	(0x0e00 << 16) | (0x92ac >> 2),
466 	0x00000000,
467 	(0x0e00 << 16) | (0x92b4 >> 2),
468 	0x00000000,
469 	(0x0e00 << 16) | (0x92b8 >> 2),
470 	0x00000000,
471 	(0x0e00 << 16) | (0x92bc >> 2),
472 	0x00000000,
473 	(0x0e00 << 16) | (0x92c0 >> 2),
474 	0x00000000,
475 	(0x0e00 << 16) | (0x92c4 >> 2),
476 	0x00000000,
477 	(0x0e00 << 16) | (0x92c8 >> 2),
478 	0x00000000,
479 	(0x0e00 << 16) | (0x92cc >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x92d0 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8c00 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8c04 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x8c20 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x8c38 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0x8c3c >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0xae00 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0x9604 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0xac08 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0xac0c >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0xac10 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xac14 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xac58 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xac68 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xac6c >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xac70 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0xac74 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0xac78 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0xac7c >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0xac80 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0xac84 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0xac88 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0xac8c >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0x970c >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0x9714 >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0x9718 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0x971c >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0x31068 >> 2),
536 	0x00000000,
537 	(0x4e00 << 16) | (0x31068 >> 2),
538 	0x00000000,
539 	(0x5e00 << 16) | (0x31068 >> 2),
540 	0x00000000,
541 	(0x6e00 << 16) | (0x31068 >> 2),
542 	0x00000000,
543 	(0x7e00 << 16) | (0x31068 >> 2),
544 	0x00000000,
545 	(0x8e00 << 16) | (0x31068 >> 2),
546 	0x00000000,
547 	(0x9e00 << 16) | (0x31068 >> 2),
548 	0x00000000,
549 	(0xae00 << 16) | (0x31068 >> 2),
550 	0x00000000,
551 	(0xbe00 << 16) | (0x31068 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0xcd10 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0xcd14 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x88b0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x88b4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x88b8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x88bc >> 2),
564 	0x00000000,
565 	(0x0400 << 16) | (0x89c0 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x88c4 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x88c8 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x88d0 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x88d4 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x88d8 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x8980 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x30938 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x3093c >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x30940 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x89a0 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x30900 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x30904 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x89b4 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x3c210 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x3c214 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x3c218 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x8904 >> 2),
600 	0x00000000,
601 	0x5,
602 	(0x0e00 << 16) | (0x8c28 >> 2),
603 	(0x0e00 << 16) | (0x8c2c >> 2),
604 	(0x0e00 << 16) | (0x8c30 >> 2),
605 	(0x0e00 << 16) | (0x8c34 >> 2),
606 	(0x0e00 << 16) | (0x9600 >> 2),
607 };
608 
609 static const u32 kalindi_rlc_save_restore_register_list[] =
610 {
611 	(0x0e00 << 16) | (0xc12c >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xc140 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xc150 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xc15c >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xc168 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xc170 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xc204 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xc2b4 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0xc2b8 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0xc2bc >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xc2c0 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0x8228 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0x829c >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0x869c >> 2),
638 	0x00000000,
639 	(0x0600 << 16) | (0x98f4 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x98f8 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x9900 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0xc260 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x90e8 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x3c000 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x3c00c >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x8c1c >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x9700 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0xcd20 >> 2),
658 	0x00000000,
659 	(0x4e00 << 16) | (0xcd20 >> 2),
660 	0x00000000,
661 	(0x5e00 << 16) | (0xcd20 >> 2),
662 	0x00000000,
663 	(0x6e00 << 16) | (0xcd20 >> 2),
664 	0x00000000,
665 	(0x7e00 << 16) | (0xcd20 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x89bc >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x8900 >> 2),
670 	0x00000000,
671 	0x3,
672 	(0x0e00 << 16) | (0xc130 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0xc134 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0xc1fc >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0xc208 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0xc264 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0xc268 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xc26c >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0xc270 >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0xc274 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0xc28c >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0xc290 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0xc294 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0xc298 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0xc2a0 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0xc2a4 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xc2a8 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xc2ac >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0x301d0 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0x30238 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0x30250 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0x30254 >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0x30258 >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0x3025c >> 2),
717 	0x00000000,
718 	(0x4e00 << 16) | (0xc900 >> 2),
719 	0x00000000,
720 	(0x5e00 << 16) | (0xc900 >> 2),
721 	0x00000000,
722 	(0x6e00 << 16) | (0xc900 >> 2),
723 	0x00000000,
724 	(0x7e00 << 16) | (0xc900 >> 2),
725 	0x00000000,
726 	(0x4e00 << 16) | (0xc904 >> 2),
727 	0x00000000,
728 	(0x5e00 << 16) | (0xc904 >> 2),
729 	0x00000000,
730 	(0x6e00 << 16) | (0xc904 >> 2),
731 	0x00000000,
732 	(0x7e00 << 16) | (0xc904 >> 2),
733 	0x00000000,
734 	(0x4e00 << 16) | (0xc908 >> 2),
735 	0x00000000,
736 	(0x5e00 << 16) | (0xc908 >> 2),
737 	0x00000000,
738 	(0x6e00 << 16) | (0xc908 >> 2),
739 	0x00000000,
740 	(0x7e00 << 16) | (0xc908 >> 2),
741 	0x00000000,
742 	(0x4e00 << 16) | (0xc90c >> 2),
743 	0x00000000,
744 	(0x5e00 << 16) | (0xc90c >> 2),
745 	0x00000000,
746 	(0x6e00 << 16) | (0xc90c >> 2),
747 	0x00000000,
748 	(0x7e00 << 16) | (0xc90c >> 2),
749 	0x00000000,
750 	(0x4e00 << 16) | (0xc910 >> 2),
751 	0x00000000,
752 	(0x5e00 << 16) | (0xc910 >> 2),
753 	0x00000000,
754 	(0x6e00 << 16) | (0xc910 >> 2),
755 	0x00000000,
756 	(0x7e00 << 16) | (0xc910 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0xc99c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9834 >> 2),
761 	0x00000000,
762 	(0x0000 << 16) | (0x30f00 >> 2),
763 	0x00000000,
764 	(0x0000 << 16) | (0x30f04 >> 2),
765 	0x00000000,
766 	(0x0000 << 16) | (0x30f08 >> 2),
767 	0x00000000,
768 	(0x0000 << 16) | (0x30f0c >> 2),
769 	0x00000000,
770 	(0x0600 << 16) | (0x9b7c >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x8a14 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8a18 >> 2),
775 	0x00000000,
776 	(0x0600 << 16) | (0x30a00 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0x8bf0 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0x8bcc >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0x8b24 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0x30a04 >> 2),
785 	0x00000000,
786 	(0x0600 << 16) | (0x30a10 >> 2),
787 	0x00000000,
788 	(0x0600 << 16) | (0x30a14 >> 2),
789 	0x00000000,
790 	(0x0600 << 16) | (0x30a18 >> 2),
791 	0x00000000,
792 	(0x0600 << 16) | (0x30a2c >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc700 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc704 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc708 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc768 >> 2),
801 	0x00000000,
802 	(0x0400 << 16) | (0xc770 >> 2),
803 	0x00000000,
804 	(0x0400 << 16) | (0xc774 >> 2),
805 	0x00000000,
806 	(0x0400 << 16) | (0xc798 >> 2),
807 	0x00000000,
808 	(0x0400 << 16) | (0xc79c >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x9100 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x3c010 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0x8c00 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0x8c04 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x8c20 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x8c38 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0x8c3c >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xae00 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x9604 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xac08 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0xac0c >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0xac10 >> 2),
833 	0x00000000,
834 	(0x0e00 << 16) | (0xac14 >> 2),
835 	0x00000000,
836 	(0x0e00 << 16) | (0xac58 >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0xac68 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0xac6c >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xac70 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xac74 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0xac78 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0xac7c >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0xac80 >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0xac84 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0xac88 >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0xac8c >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x970c >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x9714 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x9718 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x971c >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x31068 >> 2),
867 	0x00000000,
868 	(0x4e00 << 16) | (0x31068 >> 2),
869 	0x00000000,
870 	(0x5e00 << 16) | (0x31068 >> 2),
871 	0x00000000,
872 	(0x6e00 << 16) | (0x31068 >> 2),
873 	0x00000000,
874 	(0x7e00 << 16) | (0x31068 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0xcd10 >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xcd14 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0x88b0 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0x88b4 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x88b8 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x88bc >> 2),
887 	0x00000000,
888 	(0x0400 << 16) | (0x89c0 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x88c4 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x88c8 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x88d0 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x88d4 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x88d8 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x8980 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x30938 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x3093c >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x30940 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x89a0 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x30900 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x30904 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x89b4 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0x3e1fc >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0x3c210 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0x3c214 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x3c218 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0x8904 >> 2),
925 	0x00000000,
926 	0x5,
927 	(0x0e00 << 16) | (0x8c28 >> 2),
928 	(0x0e00 << 16) | (0x8c2c >> 2),
929 	(0x0e00 << 16) | (0x8c30 >> 2),
930 	(0x0e00 << 16) | (0x8c34 >> 2),
931 	(0x0e00 << 16) | (0x9600 >> 2),
932 };
933 
934 static const u32 bonaire_golden_spm_registers[] =
935 {
936 	0x30800, 0xe0ffffff, 0xe0000000
937 };
938 
939 static const u32 bonaire_golden_common_registers[] =
940 {
941 	0xc770, 0xffffffff, 0x00000800,
942 	0xc774, 0xffffffff, 0x00000800,
943 	0xc798, 0xffffffff, 0x00007fbf,
944 	0xc79c, 0xffffffff, 0x00007faf
945 };
946 
947 static const u32 bonaire_golden_registers[] =
948 {
949 	0x3354, 0x00000333, 0x00000333,
950 	0x3350, 0x000c0fc0, 0x00040200,
951 	0x9a10, 0x00010000, 0x00058208,
952 	0x3c000, 0xffff1fff, 0x00140000,
953 	0x3c200, 0xfdfc0fff, 0x00000100,
954 	0x3c234, 0x40000000, 0x40000200,
955 	0x9830, 0xffffffff, 0x00000000,
956 	0x9834, 0xf00fffff, 0x00000400,
957 	0x9838, 0x0002021c, 0x00020200,
958 	0xc78, 0x00000080, 0x00000000,
959 	0x5bb0, 0x000000f0, 0x00000070,
960 	0x5bc0, 0xf0311fff, 0x80300000,
961 	0x98f8, 0x73773777, 0x12010001,
962 	0x350c, 0x00810000, 0x408af000,
963 	0x7030, 0x31000111, 0x00000011,
964 	0x2f48, 0x73773777, 0x12010001,
965 	0x220c, 0x00007fb6, 0x0021a1b1,
966 	0x2210, 0x00007fb6, 0x002021b1,
967 	0x2180, 0x00007fb6, 0x00002191,
968 	0x2218, 0x00007fb6, 0x002121b1,
969 	0x221c, 0x00007fb6, 0x002021b1,
970 	0x21dc, 0x00007fb6, 0x00002191,
971 	0x21e0, 0x00007fb6, 0x00002191,
972 	0x3628, 0x0000003f, 0x0000000a,
973 	0x362c, 0x0000003f, 0x0000000a,
974 	0x2ae4, 0x00073ffe, 0x000022a2,
975 	0x240c, 0x000007ff, 0x00000000,
976 	0x8a14, 0xf000003f, 0x00000007,
977 	0x8bf0, 0x00002001, 0x00000001,
978 	0x8b24, 0xffffffff, 0x00ffffff,
979 	0x30a04, 0x0000ff0f, 0x00000000,
980 	0x28a4c, 0x07ffffff, 0x06000000,
981 	0x4d8, 0x00000fff, 0x00000100,
982 	0x3e78, 0x00000001, 0x00000002,
983 	0x9100, 0x03000000, 0x0362c688,
984 	0x8c00, 0x000000ff, 0x00000001,
985 	0xe40, 0x00001fff, 0x00001fff,
986 	0x9060, 0x0000007f, 0x00000020,
987 	0x9508, 0x00010000, 0x00010000,
988 	0xac14, 0x000003ff, 0x000000f3,
989 	0xac0c, 0xffffffff, 0x00001032
990 };
991 
992 static const u32 bonaire_mgcg_cgcg_init[] =
993 {
994 	0xc420, 0xffffffff, 0xfffffffc,
995 	0x30800, 0xffffffff, 0xe0000000,
996 	0x3c2a0, 0xffffffff, 0x00000100,
997 	0x3c208, 0xffffffff, 0x00000100,
998 	0x3c2c0, 0xffffffff, 0xc0000100,
999 	0x3c2c8, 0xffffffff, 0xc0000100,
1000 	0x3c2c4, 0xffffffff, 0xc0000100,
1001 	0x55e4, 0xffffffff, 0x00600100,
1002 	0x3c280, 0xffffffff, 0x00000100,
1003 	0x3c214, 0xffffffff, 0x06000100,
1004 	0x3c220, 0xffffffff, 0x00000100,
1005 	0x3c218, 0xffffffff, 0x06000100,
1006 	0x3c204, 0xffffffff, 0x00000100,
1007 	0x3c2e0, 0xffffffff, 0x00000100,
1008 	0x3c224, 0xffffffff, 0x00000100,
1009 	0x3c200, 0xffffffff, 0x00000100,
1010 	0x3c230, 0xffffffff, 0x00000100,
1011 	0x3c234, 0xffffffff, 0x00000100,
1012 	0x3c250, 0xffffffff, 0x00000100,
1013 	0x3c254, 0xffffffff, 0x00000100,
1014 	0x3c258, 0xffffffff, 0x00000100,
1015 	0x3c25c, 0xffffffff, 0x00000100,
1016 	0x3c260, 0xffffffff, 0x00000100,
1017 	0x3c27c, 0xffffffff, 0x00000100,
1018 	0x3c278, 0xffffffff, 0x00000100,
1019 	0x3c210, 0xffffffff, 0x06000100,
1020 	0x3c290, 0xffffffff, 0x00000100,
1021 	0x3c274, 0xffffffff, 0x00000100,
1022 	0x3c2b4, 0xffffffff, 0x00000100,
1023 	0x3c2b0, 0xffffffff, 0x00000100,
1024 	0x3c270, 0xffffffff, 0x00000100,
1025 	0x30800, 0xffffffff, 0xe0000000,
1026 	0x3c020, 0xffffffff, 0x00010000,
1027 	0x3c024, 0xffffffff, 0x00030002,
1028 	0x3c028, 0xffffffff, 0x00040007,
1029 	0x3c02c, 0xffffffff, 0x00060005,
1030 	0x3c030, 0xffffffff, 0x00090008,
1031 	0x3c034, 0xffffffff, 0x00010000,
1032 	0x3c038, 0xffffffff, 0x00030002,
1033 	0x3c03c, 0xffffffff, 0x00040007,
1034 	0x3c040, 0xffffffff, 0x00060005,
1035 	0x3c044, 0xffffffff, 0x00090008,
1036 	0x3c048, 0xffffffff, 0x00010000,
1037 	0x3c04c, 0xffffffff, 0x00030002,
1038 	0x3c050, 0xffffffff, 0x00040007,
1039 	0x3c054, 0xffffffff, 0x00060005,
1040 	0x3c058, 0xffffffff, 0x00090008,
1041 	0x3c05c, 0xffffffff, 0x00010000,
1042 	0x3c060, 0xffffffff, 0x00030002,
1043 	0x3c064, 0xffffffff, 0x00040007,
1044 	0x3c068, 0xffffffff, 0x00060005,
1045 	0x3c06c, 0xffffffff, 0x00090008,
1046 	0x3c070, 0xffffffff, 0x00010000,
1047 	0x3c074, 0xffffffff, 0x00030002,
1048 	0x3c078, 0xffffffff, 0x00040007,
1049 	0x3c07c, 0xffffffff, 0x00060005,
1050 	0x3c080, 0xffffffff, 0x00090008,
1051 	0x3c084, 0xffffffff, 0x00010000,
1052 	0x3c088, 0xffffffff, 0x00030002,
1053 	0x3c08c, 0xffffffff, 0x00040007,
1054 	0x3c090, 0xffffffff, 0x00060005,
1055 	0x3c094, 0xffffffff, 0x00090008,
1056 	0x3c098, 0xffffffff, 0x00010000,
1057 	0x3c09c, 0xffffffff, 0x00030002,
1058 	0x3c0a0, 0xffffffff, 0x00040007,
1059 	0x3c0a4, 0xffffffff, 0x00060005,
1060 	0x3c0a8, 0xffffffff, 0x00090008,
1061 	0x3c000, 0xffffffff, 0x96e00200,
1062 	0x8708, 0xffffffff, 0x00900100,
1063 	0xc424, 0xffffffff, 0x0020003f,
1064 	0x38, 0xffffffff, 0x0140001c,
1065 	0x3c, 0x000f0000, 0x000f0000,
1066 	0x220, 0xffffffff, 0xC060000C,
1067 	0x224, 0xc0000fff, 0x00000100,
1068 	0xf90, 0xffffffff, 0x00000100,
1069 	0xf98, 0x00000101, 0x00000000,
1070 	0x20a8, 0xffffffff, 0x00000104,
1071 	0x55e4, 0xff000fff, 0x00000100,
1072 	0x30cc, 0xc0000fff, 0x00000104,
1073 	0xc1e4, 0x00000001, 0x00000001,
1074 	0xd00c, 0xff000ff0, 0x00000100,
1075 	0xd80c, 0xff000ff0, 0x00000100
1076 };
1077 
1078 static const u32 spectre_golden_spm_registers[] =
1079 {
1080 	0x30800, 0xe0ffffff, 0xe0000000
1081 };
1082 
1083 static const u32 spectre_golden_common_registers[] =
1084 {
1085 	0xc770, 0xffffffff, 0x00000800,
1086 	0xc774, 0xffffffff, 0x00000800,
1087 	0xc798, 0xffffffff, 0x00007fbf,
1088 	0xc79c, 0xffffffff, 0x00007faf
1089 };
1090 
1091 static const u32 spectre_golden_registers[] =
1092 {
1093 	0x3c000, 0xffff1fff, 0x96940200,
1094 	0x3c00c, 0xffff0001, 0xff000000,
1095 	0x3c200, 0xfffc0fff, 0x00000100,
1096 	0x6ed8, 0x00010101, 0x00010000,
1097 	0x9834, 0xf00fffff, 0x00000400,
1098 	0x9838, 0xfffffffc, 0x00020200,
1099 	0x5bb0, 0x000000f0, 0x00000070,
1100 	0x5bc0, 0xf0311fff, 0x80300000,
1101 	0x98f8, 0x73773777, 0x12010001,
1102 	0x9b7c, 0x00ff0000, 0x00fc0000,
1103 	0x2f48, 0x73773777, 0x12010001,
1104 	0x8a14, 0xf000003f, 0x00000007,
1105 	0x8b24, 0xffffffff, 0x00ffffff,
1106 	0x28350, 0x3f3f3fff, 0x00000082,
1107 	0x28354, 0x0000003f, 0x00000000,
1108 	0x3e78, 0x00000001, 0x00000002,
1109 	0x913c, 0xffff03df, 0x00000004,
1110 	0xc768, 0x00000008, 0x00000008,
1111 	0x8c00, 0x000008ff, 0x00000800,
1112 	0x9508, 0x00010000, 0x00010000,
1113 	0xac0c, 0xffffffff, 0x54763210,
1114 	0x214f8, 0x01ff01ff, 0x00000002,
1115 	0x21498, 0x007ff800, 0x00200000,
1116 	0x2015c, 0xffffffff, 0x00000f40,
1117 	0x30934, 0xffffffff, 0x00000001
1118 };
1119 
1120 static const u32 spectre_mgcg_cgcg_init[] =
1121 {
1122 	0xc420, 0xffffffff, 0xfffffffc,
1123 	0x30800, 0xffffffff, 0xe0000000,
1124 	0x3c2a0, 0xffffffff, 0x00000100,
1125 	0x3c208, 0xffffffff, 0x00000100,
1126 	0x3c2c0, 0xffffffff, 0x00000100,
1127 	0x3c2c8, 0xffffffff, 0x00000100,
1128 	0x3c2c4, 0xffffffff, 0x00000100,
1129 	0x55e4, 0xffffffff, 0x00600100,
1130 	0x3c280, 0xffffffff, 0x00000100,
1131 	0x3c214, 0xffffffff, 0x06000100,
1132 	0x3c220, 0xffffffff, 0x00000100,
1133 	0x3c218, 0xffffffff, 0x06000100,
1134 	0x3c204, 0xffffffff, 0x00000100,
1135 	0x3c2e0, 0xffffffff, 0x00000100,
1136 	0x3c224, 0xffffffff, 0x00000100,
1137 	0x3c200, 0xffffffff, 0x00000100,
1138 	0x3c230, 0xffffffff, 0x00000100,
1139 	0x3c234, 0xffffffff, 0x00000100,
1140 	0x3c250, 0xffffffff, 0x00000100,
1141 	0x3c254, 0xffffffff, 0x00000100,
1142 	0x3c258, 0xffffffff, 0x00000100,
1143 	0x3c25c, 0xffffffff, 0x00000100,
1144 	0x3c260, 0xffffffff, 0x00000100,
1145 	0x3c27c, 0xffffffff, 0x00000100,
1146 	0x3c278, 0xffffffff, 0x00000100,
1147 	0x3c210, 0xffffffff, 0x06000100,
1148 	0x3c290, 0xffffffff, 0x00000100,
1149 	0x3c274, 0xffffffff, 0x00000100,
1150 	0x3c2b4, 0xffffffff, 0x00000100,
1151 	0x3c2b0, 0xffffffff, 0x00000100,
1152 	0x3c270, 0xffffffff, 0x00000100,
1153 	0x30800, 0xffffffff, 0xe0000000,
1154 	0x3c020, 0xffffffff, 0x00010000,
1155 	0x3c024, 0xffffffff, 0x00030002,
1156 	0x3c028, 0xffffffff, 0x00040007,
1157 	0x3c02c, 0xffffffff, 0x00060005,
1158 	0x3c030, 0xffffffff, 0x00090008,
1159 	0x3c034, 0xffffffff, 0x00010000,
1160 	0x3c038, 0xffffffff, 0x00030002,
1161 	0x3c03c, 0xffffffff, 0x00040007,
1162 	0x3c040, 0xffffffff, 0x00060005,
1163 	0x3c044, 0xffffffff, 0x00090008,
1164 	0x3c048, 0xffffffff, 0x00010000,
1165 	0x3c04c, 0xffffffff, 0x00030002,
1166 	0x3c050, 0xffffffff, 0x00040007,
1167 	0x3c054, 0xffffffff, 0x00060005,
1168 	0x3c058, 0xffffffff, 0x00090008,
1169 	0x3c05c, 0xffffffff, 0x00010000,
1170 	0x3c060, 0xffffffff, 0x00030002,
1171 	0x3c064, 0xffffffff, 0x00040007,
1172 	0x3c068, 0xffffffff, 0x00060005,
1173 	0x3c06c, 0xffffffff, 0x00090008,
1174 	0x3c070, 0xffffffff, 0x00010000,
1175 	0x3c074, 0xffffffff, 0x00030002,
1176 	0x3c078, 0xffffffff, 0x00040007,
1177 	0x3c07c, 0xffffffff, 0x00060005,
1178 	0x3c080, 0xffffffff, 0x00090008,
1179 	0x3c084, 0xffffffff, 0x00010000,
1180 	0x3c088, 0xffffffff, 0x00030002,
1181 	0x3c08c, 0xffffffff, 0x00040007,
1182 	0x3c090, 0xffffffff, 0x00060005,
1183 	0x3c094, 0xffffffff, 0x00090008,
1184 	0x3c098, 0xffffffff, 0x00010000,
1185 	0x3c09c, 0xffffffff, 0x00030002,
1186 	0x3c0a0, 0xffffffff, 0x00040007,
1187 	0x3c0a4, 0xffffffff, 0x00060005,
1188 	0x3c0a8, 0xffffffff, 0x00090008,
1189 	0x3c0ac, 0xffffffff, 0x00010000,
1190 	0x3c0b0, 0xffffffff, 0x00030002,
1191 	0x3c0b4, 0xffffffff, 0x00040007,
1192 	0x3c0b8, 0xffffffff, 0x00060005,
1193 	0x3c0bc, 0xffffffff, 0x00090008,
1194 	0x3c000, 0xffffffff, 0x96e00200,
1195 	0x8708, 0xffffffff, 0x00900100,
1196 	0xc424, 0xffffffff, 0x0020003f,
1197 	0x38, 0xffffffff, 0x0140001c,
1198 	0x3c, 0x000f0000, 0x000f0000,
1199 	0x220, 0xffffffff, 0xC060000C,
1200 	0x224, 0xc0000fff, 0x00000100,
1201 	0xf90, 0xffffffff, 0x00000100,
1202 	0xf98, 0x00000101, 0x00000000,
1203 	0x20a8, 0xffffffff, 0x00000104,
1204 	0x55e4, 0xff000fff, 0x00000100,
1205 	0x30cc, 0xc0000fff, 0x00000104,
1206 	0xc1e4, 0x00000001, 0x00000001,
1207 	0xd00c, 0xff000ff0, 0x00000100,
1208 	0xd80c, 0xff000ff0, 0x00000100
1209 };
1210 
1211 static const u32 kalindi_golden_spm_registers[] =
1212 {
1213 	0x30800, 0xe0ffffff, 0xe0000000
1214 };
1215 
1216 static const u32 kalindi_golden_common_registers[] =
1217 {
1218 	0xc770, 0xffffffff, 0x00000800,
1219 	0xc774, 0xffffffff, 0x00000800,
1220 	0xc798, 0xffffffff, 0x00007fbf,
1221 	0xc79c, 0xffffffff, 0x00007faf
1222 };
1223 
1224 static const u32 kalindi_golden_registers[] =
1225 {
1226 	0x3c000, 0xffffdfff, 0x6e944040,
1227 	0x55e4, 0xff607fff, 0xfc000100,
1228 	0x3c220, 0xff000fff, 0x00000100,
1229 	0x3c224, 0xff000fff, 0x00000100,
1230 	0x3c200, 0xfffc0fff, 0x00000100,
1231 	0x6ed8, 0x00010101, 0x00010000,
1232 	0x9830, 0xffffffff, 0x00000000,
1233 	0x9834, 0xf00fffff, 0x00000400,
1234 	0x5bb0, 0x000000f0, 0x00000070,
1235 	0x5bc0, 0xf0311fff, 0x80300000,
1236 	0x98f8, 0x73773777, 0x12010001,
1237 	0x98fc, 0xffffffff, 0x00000010,
1238 	0x9b7c, 0x00ff0000, 0x00fc0000,
1239 	0x8030, 0x00001f0f, 0x0000100a,
1240 	0x2f48, 0x73773777, 0x12010001,
1241 	0x2408, 0x000fffff, 0x000c007f,
1242 	0x8a14, 0xf000003f, 0x00000007,
1243 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1244 	0x30a04, 0x0000ff0f, 0x00000000,
1245 	0x28a4c, 0x07ffffff, 0x06000000,
1246 	0x4d8, 0x00000fff, 0x00000100,
1247 	0x3e78, 0x00000001, 0x00000002,
1248 	0xc768, 0x00000008, 0x00000008,
1249 	0x8c00, 0x000000ff, 0x00000003,
1250 	0x214f8, 0x01ff01ff, 0x00000002,
1251 	0x21498, 0x007ff800, 0x00200000,
1252 	0x2015c, 0xffffffff, 0x00000f40,
1253 	0x88c4, 0x001f3ae3, 0x00000082,
1254 	0x88d4, 0x0000001f, 0x00000010,
1255 	0x30934, 0xffffffff, 0x00000000
1256 };
1257 
1258 static const u32 kalindi_mgcg_cgcg_init[] =
1259 {
1260 	0xc420, 0xffffffff, 0xfffffffc,
1261 	0x30800, 0xffffffff, 0xe0000000,
1262 	0x3c2a0, 0xffffffff, 0x00000100,
1263 	0x3c208, 0xffffffff, 0x00000100,
1264 	0x3c2c0, 0xffffffff, 0x00000100,
1265 	0x3c2c8, 0xffffffff, 0x00000100,
1266 	0x3c2c4, 0xffffffff, 0x00000100,
1267 	0x55e4, 0xffffffff, 0x00600100,
1268 	0x3c280, 0xffffffff, 0x00000100,
1269 	0x3c214, 0xffffffff, 0x06000100,
1270 	0x3c220, 0xffffffff, 0x00000100,
1271 	0x3c218, 0xffffffff, 0x06000100,
1272 	0x3c204, 0xffffffff, 0x00000100,
1273 	0x3c2e0, 0xffffffff, 0x00000100,
1274 	0x3c224, 0xffffffff, 0x00000100,
1275 	0x3c200, 0xffffffff, 0x00000100,
1276 	0x3c230, 0xffffffff, 0x00000100,
1277 	0x3c234, 0xffffffff, 0x00000100,
1278 	0x3c250, 0xffffffff, 0x00000100,
1279 	0x3c254, 0xffffffff, 0x00000100,
1280 	0x3c258, 0xffffffff, 0x00000100,
1281 	0x3c25c, 0xffffffff, 0x00000100,
1282 	0x3c260, 0xffffffff, 0x00000100,
1283 	0x3c27c, 0xffffffff, 0x00000100,
1284 	0x3c278, 0xffffffff, 0x00000100,
1285 	0x3c210, 0xffffffff, 0x06000100,
1286 	0x3c290, 0xffffffff, 0x00000100,
1287 	0x3c274, 0xffffffff, 0x00000100,
1288 	0x3c2b4, 0xffffffff, 0x00000100,
1289 	0x3c2b0, 0xffffffff, 0x00000100,
1290 	0x3c270, 0xffffffff, 0x00000100,
1291 	0x30800, 0xffffffff, 0xe0000000,
1292 	0x3c020, 0xffffffff, 0x00010000,
1293 	0x3c024, 0xffffffff, 0x00030002,
1294 	0x3c028, 0xffffffff, 0x00040007,
1295 	0x3c02c, 0xffffffff, 0x00060005,
1296 	0x3c030, 0xffffffff, 0x00090008,
1297 	0x3c034, 0xffffffff, 0x00010000,
1298 	0x3c038, 0xffffffff, 0x00030002,
1299 	0x3c03c, 0xffffffff, 0x00040007,
1300 	0x3c040, 0xffffffff, 0x00060005,
1301 	0x3c044, 0xffffffff, 0x00090008,
1302 	0x3c000, 0xffffffff, 0x96e00200,
1303 	0x8708, 0xffffffff, 0x00900100,
1304 	0xc424, 0xffffffff, 0x0020003f,
1305 	0x38, 0xffffffff, 0x0140001c,
1306 	0x3c, 0x000f0000, 0x000f0000,
1307 	0x220, 0xffffffff, 0xC060000C,
1308 	0x224, 0xc0000fff, 0x00000100,
1309 	0x20a8, 0xffffffff, 0x00000104,
1310 	0x55e4, 0xff000fff, 0x00000100,
1311 	0x30cc, 0xc0000fff, 0x00000104,
1312 	0xc1e4, 0x00000001, 0x00000001,
1313 	0xd00c, 0xff000ff0, 0x00000100,
1314 	0xd80c, 0xff000ff0, 0x00000100
1315 };
1316 
1317 static const u32 hawaii_golden_spm_registers[] =
1318 {
1319 	0x30800, 0xe0ffffff, 0xe0000000
1320 };
1321 
1322 static const u32 hawaii_golden_common_registers[] =
1323 {
1324 	0x30800, 0xffffffff, 0xe0000000,
1325 	0x28350, 0xffffffff, 0x3a00161a,
1326 	0x28354, 0xffffffff, 0x0000002e,
1327 	0x9a10, 0xffffffff, 0x00018208,
1328 	0x98f8, 0xffffffff, 0x12011003
1329 };
1330 
1331 static const u32 hawaii_golden_registers[] =
1332 {
1333 	0x3354, 0x00000333, 0x00000333,
1334 	0x9a10, 0x00010000, 0x00058208,
1335 	0x9830, 0xffffffff, 0x00000000,
1336 	0x9834, 0xf00fffff, 0x00000400,
1337 	0x9838, 0x0002021c, 0x00020200,
1338 	0xc78, 0x00000080, 0x00000000,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x350c, 0x00810000, 0x408af000,
1342 	0x7030, 0x31000111, 0x00000011,
1343 	0x2f48, 0x73773777, 0x12010001,
1344 	0x2120, 0x0000007f, 0x0000001b,
1345 	0x21dc, 0x00007fb6, 0x00002191,
1346 	0x3628, 0x0000003f, 0x0000000a,
1347 	0x362c, 0x0000003f, 0x0000000a,
1348 	0x2ae4, 0x00073ffe, 0x000022a2,
1349 	0x240c, 0x000007ff, 0x00000000,
1350 	0x8bf0, 0x00002001, 0x00000001,
1351 	0x8b24, 0xffffffff, 0x00ffffff,
1352 	0x30a04, 0x0000ff0f, 0x00000000,
1353 	0x28a4c, 0x07ffffff, 0x06000000,
1354 	0x3e78, 0x00000001, 0x00000002,
1355 	0xc768, 0x00000008, 0x00000008,
1356 	0xc770, 0x00000f00, 0x00000800,
1357 	0xc774, 0x00000f00, 0x00000800,
1358 	0xc798, 0x00ffffff, 0x00ff7fbf,
1359 	0xc79c, 0x00ffffff, 0x00ff7faf,
1360 	0x8c00, 0x000000ff, 0x00000800,
1361 	0xe40, 0x00001fff, 0x00001fff,
1362 	0x9060, 0x0000007f, 0x00000020,
1363 	0x9508, 0x00010000, 0x00010000,
1364 	0xae00, 0x00100000, 0x000ff07c,
1365 	0xac14, 0x000003ff, 0x0000000f,
1366 	0xac10, 0xffffffff, 0x7564fdec,
1367 	0xac0c, 0xffffffff, 0x3120b9a8,
1368 	0xac08, 0x20000000, 0x0f9c0000
1369 };
1370 
1371 static const u32 hawaii_mgcg_cgcg_init[] =
1372 {
1373 	0xc420, 0xffffffff, 0xfffffffd,
1374 	0x30800, 0xffffffff, 0xe0000000,
1375 	0x3c2a0, 0xffffffff, 0x00000100,
1376 	0x3c208, 0xffffffff, 0x00000100,
1377 	0x3c2c0, 0xffffffff, 0x00000100,
1378 	0x3c2c8, 0xffffffff, 0x00000100,
1379 	0x3c2c4, 0xffffffff, 0x00000100,
1380 	0x55e4, 0xffffffff, 0x00200100,
1381 	0x3c280, 0xffffffff, 0x00000100,
1382 	0x3c214, 0xffffffff, 0x06000100,
1383 	0x3c220, 0xffffffff, 0x00000100,
1384 	0x3c218, 0xffffffff, 0x06000100,
1385 	0x3c204, 0xffffffff, 0x00000100,
1386 	0x3c2e0, 0xffffffff, 0x00000100,
1387 	0x3c224, 0xffffffff, 0x00000100,
1388 	0x3c200, 0xffffffff, 0x00000100,
1389 	0x3c230, 0xffffffff, 0x00000100,
1390 	0x3c234, 0xffffffff, 0x00000100,
1391 	0x3c250, 0xffffffff, 0x00000100,
1392 	0x3c254, 0xffffffff, 0x00000100,
1393 	0x3c258, 0xffffffff, 0x00000100,
1394 	0x3c25c, 0xffffffff, 0x00000100,
1395 	0x3c260, 0xffffffff, 0x00000100,
1396 	0x3c27c, 0xffffffff, 0x00000100,
1397 	0x3c278, 0xffffffff, 0x00000100,
1398 	0x3c210, 0xffffffff, 0x06000100,
1399 	0x3c290, 0xffffffff, 0x00000100,
1400 	0x3c274, 0xffffffff, 0x00000100,
1401 	0x3c2b4, 0xffffffff, 0x00000100,
1402 	0x3c2b0, 0xffffffff, 0x00000100,
1403 	0x3c270, 0xffffffff, 0x00000100,
1404 	0x30800, 0xffffffff, 0xe0000000,
1405 	0x3c020, 0xffffffff, 0x00010000,
1406 	0x3c024, 0xffffffff, 0x00030002,
1407 	0x3c028, 0xffffffff, 0x00040007,
1408 	0x3c02c, 0xffffffff, 0x00060005,
1409 	0x3c030, 0xffffffff, 0x00090008,
1410 	0x3c034, 0xffffffff, 0x00010000,
1411 	0x3c038, 0xffffffff, 0x00030002,
1412 	0x3c03c, 0xffffffff, 0x00040007,
1413 	0x3c040, 0xffffffff, 0x00060005,
1414 	0x3c044, 0xffffffff, 0x00090008,
1415 	0x3c048, 0xffffffff, 0x00010000,
1416 	0x3c04c, 0xffffffff, 0x00030002,
1417 	0x3c050, 0xffffffff, 0x00040007,
1418 	0x3c054, 0xffffffff, 0x00060005,
1419 	0x3c058, 0xffffffff, 0x00090008,
1420 	0x3c05c, 0xffffffff, 0x00010000,
1421 	0x3c060, 0xffffffff, 0x00030002,
1422 	0x3c064, 0xffffffff, 0x00040007,
1423 	0x3c068, 0xffffffff, 0x00060005,
1424 	0x3c06c, 0xffffffff, 0x00090008,
1425 	0x3c070, 0xffffffff, 0x00010000,
1426 	0x3c074, 0xffffffff, 0x00030002,
1427 	0x3c078, 0xffffffff, 0x00040007,
1428 	0x3c07c, 0xffffffff, 0x00060005,
1429 	0x3c080, 0xffffffff, 0x00090008,
1430 	0x3c084, 0xffffffff, 0x00010000,
1431 	0x3c088, 0xffffffff, 0x00030002,
1432 	0x3c08c, 0xffffffff, 0x00040007,
1433 	0x3c090, 0xffffffff, 0x00060005,
1434 	0x3c094, 0xffffffff, 0x00090008,
1435 	0x3c098, 0xffffffff, 0x00010000,
1436 	0x3c09c, 0xffffffff, 0x00030002,
1437 	0x3c0a0, 0xffffffff, 0x00040007,
1438 	0x3c0a4, 0xffffffff, 0x00060005,
1439 	0x3c0a8, 0xffffffff, 0x00090008,
1440 	0x3c0ac, 0xffffffff, 0x00010000,
1441 	0x3c0b0, 0xffffffff, 0x00030002,
1442 	0x3c0b4, 0xffffffff, 0x00040007,
1443 	0x3c0b8, 0xffffffff, 0x00060005,
1444 	0x3c0bc, 0xffffffff, 0x00090008,
1445 	0x3c0c0, 0xffffffff, 0x00010000,
1446 	0x3c0c4, 0xffffffff, 0x00030002,
1447 	0x3c0c8, 0xffffffff, 0x00040007,
1448 	0x3c0cc, 0xffffffff, 0x00060005,
1449 	0x3c0d0, 0xffffffff, 0x00090008,
1450 	0x3c0d4, 0xffffffff, 0x00010000,
1451 	0x3c0d8, 0xffffffff, 0x00030002,
1452 	0x3c0dc, 0xffffffff, 0x00040007,
1453 	0x3c0e0, 0xffffffff, 0x00060005,
1454 	0x3c0e4, 0xffffffff, 0x00090008,
1455 	0x3c0e8, 0xffffffff, 0x00010000,
1456 	0x3c0ec, 0xffffffff, 0x00030002,
1457 	0x3c0f0, 0xffffffff, 0x00040007,
1458 	0x3c0f4, 0xffffffff, 0x00060005,
1459 	0x3c0f8, 0xffffffff, 0x00090008,
1460 	0xc318, 0xffffffff, 0x00020200,
1461 	0x3350, 0xffffffff, 0x00000200,
1462 	0x15c0, 0xffffffff, 0x00000400,
1463 	0x55e8, 0xffffffff, 0x00000000,
1464 	0x2f50, 0xffffffff, 0x00000902,
1465 	0x3c000, 0xffffffff, 0x96940200,
1466 	0x8708, 0xffffffff, 0x00900100,
1467 	0xc424, 0xffffffff, 0x0020003f,
1468 	0x38, 0xffffffff, 0x0140001c,
1469 	0x3c, 0x000f0000, 0x000f0000,
1470 	0x220, 0xffffffff, 0xc060000c,
1471 	0x224, 0xc0000fff, 0x00000100,
1472 	0xf90, 0xffffffff, 0x00000100,
1473 	0xf98, 0x00000101, 0x00000000,
1474 	0x20a8, 0xffffffff, 0x00000104,
1475 	0x55e4, 0xff000fff, 0x00000100,
1476 	0x30cc, 0xc0000fff, 0x00000104,
1477 	0xc1e4, 0x00000001, 0x00000001,
1478 	0xd00c, 0xff000ff0, 0x00000100,
1479 	0xd80c, 0xff000ff0, 0x00000100
1480 };
1481 
1482 static const u32 godavari_golden_registers[] =
1483 {
1484 	0x55e4, 0xff607fff, 0xfc000100,
1485 	0x6ed8, 0x00010101, 0x00010000,
1486 	0x9830, 0xffffffff, 0x00000000,
1487 	0x98302, 0xf00fffff, 0x00000400,
1488 	0x6130, 0xffffffff, 0x00010000,
1489 	0x5bb0, 0x000000f0, 0x00000070,
1490 	0x5bc0, 0xf0311fff, 0x80300000,
1491 	0x98f8, 0x73773777, 0x12010001,
1492 	0x98fc, 0xffffffff, 0x00000010,
1493 	0x8030, 0x00001f0f, 0x0000100a,
1494 	0x2f48, 0x73773777, 0x12010001,
1495 	0x2408, 0x000fffff, 0x000c007f,
1496 	0x8a14, 0xf000003f, 0x00000007,
1497 	0x8b24, 0xffffffff, 0x00ff0fff,
1498 	0x30a04, 0x0000ff0f, 0x00000000,
1499 	0x28a4c, 0x07ffffff, 0x06000000,
1500 	0x4d8, 0x00000fff, 0x00000100,
1501 	0xd014, 0x00010000, 0x00810001,
1502 	0xd814, 0x00010000, 0x00810001,
1503 	0x3e78, 0x00000001, 0x00000002,
1504 	0xc768, 0x00000008, 0x00000008,
1505 	0xc770, 0x00000f00, 0x00000800,
1506 	0xc774, 0x00000f00, 0x00000800,
1507 	0xc798, 0x00ffffff, 0x00ff7fbf,
1508 	0xc79c, 0x00ffffff, 0x00ff7faf,
1509 	0x8c00, 0x000000ff, 0x00000001,
1510 	0x214f8, 0x01ff01ff, 0x00000002,
1511 	0x21498, 0x007ff800, 0x00200000,
1512 	0x2015c, 0xffffffff, 0x00000f40,
1513 	0x88c4, 0x001f3ae3, 0x00000082,
1514 	0x88d4, 0x0000001f, 0x00000010,
1515 	0x30934, 0xffffffff, 0x00000000
1516 };
1517 
1518 
1519 static void cik_init_golden_registers(struct radeon_device *rdev)
1520 {
1521 	switch (rdev->family) {
1522 	case CHIP_BONAIRE:
1523 		radeon_program_register_sequence(rdev,
1524 						 bonaire_mgcg_cgcg_init,
1525 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1526 		radeon_program_register_sequence(rdev,
1527 						 bonaire_golden_registers,
1528 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1529 		radeon_program_register_sequence(rdev,
1530 						 bonaire_golden_common_registers,
1531 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1532 		radeon_program_register_sequence(rdev,
1533 						 bonaire_golden_spm_registers,
1534 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1535 		break;
1536 	case CHIP_KABINI:
1537 		radeon_program_register_sequence(rdev,
1538 						 kalindi_mgcg_cgcg_init,
1539 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1540 		radeon_program_register_sequence(rdev,
1541 						 kalindi_golden_registers,
1542 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1543 		radeon_program_register_sequence(rdev,
1544 						 kalindi_golden_common_registers,
1545 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1546 		radeon_program_register_sequence(rdev,
1547 						 kalindi_golden_spm_registers,
1548 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1549 		break;
1550 	case CHIP_MULLINS:
1551 		radeon_program_register_sequence(rdev,
1552 						 kalindi_mgcg_cgcg_init,
1553 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1554 		radeon_program_register_sequence(rdev,
1555 						 godavari_golden_registers,
1556 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1557 		radeon_program_register_sequence(rdev,
1558 						 kalindi_golden_common_registers,
1559 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1560 		radeon_program_register_sequence(rdev,
1561 						 kalindi_golden_spm_registers,
1562 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1563 		break;
1564 	case CHIP_KAVERI:
1565 		radeon_program_register_sequence(rdev,
1566 						 spectre_mgcg_cgcg_init,
1567 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1568 		radeon_program_register_sequence(rdev,
1569 						 spectre_golden_registers,
1570 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1571 		radeon_program_register_sequence(rdev,
1572 						 spectre_golden_common_registers,
1573 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1574 		radeon_program_register_sequence(rdev,
1575 						 spectre_golden_spm_registers,
1576 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1577 		break;
1578 	case CHIP_HAWAII:
1579 		radeon_program_register_sequence(rdev,
1580 						 hawaii_mgcg_cgcg_init,
1581 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1582 		radeon_program_register_sequence(rdev,
1583 						 hawaii_golden_registers,
1584 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1585 		radeon_program_register_sequence(rdev,
1586 						 hawaii_golden_common_registers,
1587 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1588 		radeon_program_register_sequence(rdev,
1589 						 hawaii_golden_spm_registers,
1590 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1591 		break;
1592 	default:
1593 		break;
1594 	}
1595 }
1596 
1597 /**
1598  * cik_get_xclk - get the xclk
1599  *
1600  * @rdev: radeon_device pointer
1601  *
1602  * Returns the reference clock used by the gfx engine
1603  * (CIK).
1604  */
1605 u32 cik_get_xclk(struct radeon_device *rdev)
1606 {
1607         u32 reference_clock = rdev->clock.spll.reference_freq;
1608 
1609 	if (rdev->flags & RADEON_IS_IGP) {
1610 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1611 			return reference_clock / 2;
1612 	} else {
1613 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1614 			return reference_clock / 4;
1615 	}
1616 	return reference_clock;
1617 }
1618 
1619 /**
1620  * cik_mm_rdoorbell - read a doorbell dword
1621  *
1622  * @rdev: radeon_device pointer
1623  * @index: doorbell index
1624  *
1625  * Returns the value in the doorbell aperture at the
1626  * requested doorbell index (CIK).
1627  */
1628 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1629 {
1630 	if (index < rdev->doorbell.num_doorbells) {
1631 		return readl(rdev->doorbell.ptr + index);
1632 	} else {
1633 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1634 		return 0;
1635 	}
1636 }
1637 
1638 /**
1639  * cik_mm_wdoorbell - write a doorbell dword
1640  *
1641  * @rdev: radeon_device pointer
1642  * @index: doorbell index
1643  * @v: value to write
1644  *
1645  * Writes @v to the doorbell aperture at the
1646  * requested doorbell index (CIK).
1647  */
1648 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1649 {
1650 	if (index < rdev->doorbell.num_doorbells) {
1651 		writel(v, rdev->doorbell.ptr + index);
1652 	} else {
1653 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1654 	}
1655 }
1656 
1657 #define BONAIRE_IO_MC_REGS_SIZE 36
1658 
1659 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1660 {
1661 	{0x00000070, 0x04400000},
1662 	{0x00000071, 0x80c01803},
1663 	{0x00000072, 0x00004004},
1664 	{0x00000073, 0x00000100},
1665 	{0x00000074, 0x00ff0000},
1666 	{0x00000075, 0x34000000},
1667 	{0x00000076, 0x08000014},
1668 	{0x00000077, 0x00cc08ec},
1669 	{0x00000078, 0x00000400},
1670 	{0x00000079, 0x00000000},
1671 	{0x0000007a, 0x04090000},
1672 	{0x0000007c, 0x00000000},
1673 	{0x0000007e, 0x4408a8e8},
1674 	{0x0000007f, 0x00000304},
1675 	{0x00000080, 0x00000000},
1676 	{0x00000082, 0x00000001},
1677 	{0x00000083, 0x00000002},
1678 	{0x00000084, 0xf3e4f400},
1679 	{0x00000085, 0x052024e3},
1680 	{0x00000087, 0x00000000},
1681 	{0x00000088, 0x01000000},
1682 	{0x0000008a, 0x1c0a0000},
1683 	{0x0000008b, 0xff010000},
1684 	{0x0000008d, 0xffffefff},
1685 	{0x0000008e, 0xfff3efff},
1686 	{0x0000008f, 0xfff3efbf},
1687 	{0x00000092, 0xf7ffffff},
1688 	{0x00000093, 0xffffff7f},
1689 	{0x00000095, 0x00101101},
1690 	{0x00000096, 0x00000fff},
1691 	{0x00000097, 0x00116fff},
1692 	{0x00000098, 0x60010000},
1693 	{0x00000099, 0x10010000},
1694 	{0x0000009a, 0x00006000},
1695 	{0x0000009b, 0x00001000},
1696 	{0x0000009f, 0x00b48000}
1697 };
1698 
1699 #define HAWAII_IO_MC_REGS_SIZE 22
1700 
1701 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1702 {
1703 	{0x0000007d, 0x40000000},
1704 	{0x0000007e, 0x40180304},
1705 	{0x0000007f, 0x0000ff00},
1706 	{0x00000081, 0x00000000},
1707 	{0x00000083, 0x00000800},
1708 	{0x00000086, 0x00000000},
1709 	{0x00000087, 0x00000100},
1710 	{0x00000088, 0x00020100},
1711 	{0x00000089, 0x00000000},
1712 	{0x0000008b, 0x00040000},
1713 	{0x0000008c, 0x00000100},
1714 	{0x0000008e, 0xff010000},
1715 	{0x00000090, 0xffffefff},
1716 	{0x00000091, 0xfff3efff},
1717 	{0x00000092, 0xfff3efbf},
1718 	{0x00000093, 0xf7ffffff},
1719 	{0x00000094, 0xffffff7f},
1720 	{0x00000095, 0x00000fff},
1721 	{0x00000096, 0x00116fff},
1722 	{0x00000097, 0x60010000},
1723 	{0x00000098, 0x10010000},
1724 	{0x0000009f, 0x00c79000}
1725 };
1726 
1727 
1728 /**
1729  * cik_srbm_select - select specific register instances
1730  *
1731  * @rdev: radeon_device pointer
1732  * @me: selected ME (micro engine)
1733  * @pipe: pipe
1734  * @queue: queue
1735  * @vmid: VMID
1736  *
1737  * Switches the currently active registers instances.  Some
1738  * registers are instanced per VMID, others are instanced per
1739  * me/pipe/queue combination.
1740  */
1741 static void cik_srbm_select(struct radeon_device *rdev,
1742 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1743 {
1744 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1745 			     MEID(me & 0x3) |
1746 			     VMID(vmid & 0xf) |
1747 			     QUEUEID(queue & 0x7));
1748 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1749 }
1750 
1751 /* ucode loading */
1752 /**
1753  * ci_mc_load_microcode - load MC ucode into the hw
1754  *
1755  * @rdev: radeon_device pointer
1756  *
1757  * Load the GDDR MC ucode into the hw (CIK).
1758  * Returns 0 on success, error on failure.
1759  */
1760 int ci_mc_load_microcode(struct radeon_device *rdev)
1761 {
1762 	const __be32 *fw_data;
1763 	u32 running, blackout = 0;
1764 	u32 *io_mc_regs;
1765 	int i, regs_size, ucode_size;
1766 
1767 	if (!rdev->mc_fw)
1768 		return -EINVAL;
1769 
1770 	ucode_size = rdev->mc_fw->size / 4;
1771 
1772 	switch (rdev->family) {
1773 	case CHIP_BONAIRE:
1774 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1775 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1776 		break;
1777 	case CHIP_HAWAII:
1778 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1779 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1780 		break;
1781 	default:
1782 		return -EINVAL;
1783 	}
1784 
1785 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1786 
1787 	if (running == 0) {
1788 		if (running) {
1789 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1790 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1791 		}
1792 
1793 		/* reset the engine and set to writable */
1794 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1795 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1796 
1797 		/* load mc io regs */
1798 		for (i = 0; i < regs_size; i++) {
1799 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1800 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1801 		}
1802 		/* load the MC ucode */
1803 		fw_data = (const __be32 *)rdev->mc_fw->data;
1804 		for (i = 0; i < ucode_size; i++)
1805 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1806 
1807 		/* put the engine back into the active state */
1808 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1809 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1810 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1811 
1812 		/* wait for training to complete */
1813 		for (i = 0; i < rdev->usec_timeout; i++) {
1814 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1815 				break;
1816 			udelay(1);
1817 		}
1818 		for (i = 0; i < rdev->usec_timeout; i++) {
1819 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1820 				break;
1821 			udelay(1);
1822 		}
1823 
1824 		if (running)
1825 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1826 	}
1827 
1828 	return 0;
1829 }
1830 
1831 /**
1832  * cik_init_microcode - load ucode images from disk
1833  *
1834  * @rdev: radeon_device pointer
1835  *
1836  * Use the firmware interface to load the ucode images into
1837  * the driver (not loaded into hw).
1838  * Returns 0 on success, error on failure.
1839  */
1840 static int cik_init_microcode(struct radeon_device *rdev)
1841 {
1842 	const char *chip_name;
1843 	size_t pfp_req_size, me_req_size, ce_req_size,
1844 		mec_req_size, rlc_req_size, mc_req_size = 0,
1845 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1846 	char fw_name[30];
1847 	int err;
1848 
1849 	DRM_DEBUG("\n");
1850 
1851 	switch (rdev->family) {
1852 	case CHIP_BONAIRE:
1853 		chip_name = "BONAIRE";
1854 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1855 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1856 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1857 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1858 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1859 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1860 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1861 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1862 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1863 		break;
1864 	case CHIP_HAWAII:
1865 		chip_name = "HAWAII";
1866 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1867 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1868 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1869 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1870 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1871 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1872 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1873 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1874 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1875 		break;
1876 	case CHIP_KAVERI:
1877 		chip_name = "KAVERI";
1878 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1879 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1880 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1881 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1882 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1883 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1884 		break;
1885 	case CHIP_KABINI:
1886 		chip_name = "KABINI";
1887 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1888 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1889 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1890 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1891 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1892 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1893 		break;
1894 	case CHIP_MULLINS:
1895 		chip_name = "MULLINS";
1896 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1897 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1898 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1899 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1900 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1901 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1902 		break;
1903 	default: BUG();
1904 	}
1905 
1906 	DRM_INFO("Loading %s Microcode\n", chip_name);
1907 
1908 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1909 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1910 	if (err)
1911 		goto out;
1912 	if (rdev->pfp_fw->size != pfp_req_size) {
1913 		printk(KERN_ERR
1914 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1915 		       rdev->pfp_fw->size, fw_name);
1916 		err = -EINVAL;
1917 		goto out;
1918 	}
1919 
1920 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1921 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1922 	if (err)
1923 		goto out;
1924 	if (rdev->me_fw->size != me_req_size) {
1925 		printk(KERN_ERR
1926 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1927 		       rdev->me_fw->size, fw_name);
1928 		err = -EINVAL;
1929 	}
1930 
1931 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1932 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1933 	if (err)
1934 		goto out;
1935 	if (rdev->ce_fw->size != ce_req_size) {
1936 		printk(KERN_ERR
1937 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1938 		       rdev->ce_fw->size, fw_name);
1939 		err = -EINVAL;
1940 	}
1941 
1942 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1943 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1944 	if (err)
1945 		goto out;
1946 	if (rdev->mec_fw->size != mec_req_size) {
1947 		printk(KERN_ERR
1948 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1949 		       rdev->mec_fw->size, fw_name);
1950 		err = -EINVAL;
1951 	}
1952 
1953 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1954 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1955 	if (err)
1956 		goto out;
1957 	if (rdev->rlc_fw->size != rlc_req_size) {
1958 		printk(KERN_ERR
1959 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1960 		       rdev->rlc_fw->size, fw_name);
1961 		err = -EINVAL;
1962 	}
1963 
1964 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1965 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1966 	if (err)
1967 		goto out;
1968 	if (rdev->sdma_fw->size != sdma_req_size) {
1969 		printk(KERN_ERR
1970 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1971 		       rdev->sdma_fw->size, fw_name);
1972 		err = -EINVAL;
1973 	}
1974 
1975 	/* No SMC, MC ucode on APUs */
1976 	if (!(rdev->flags & RADEON_IS_IGP)) {
1977 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1978 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1979 		if (err) {
1980 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1981 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1982 			if (err)
1983 				goto out;
1984 		}
1985 		if ((rdev->mc_fw->size != mc_req_size) &&
1986 		    (rdev->mc_fw->size != mc2_req_size)){
1987 			printk(KERN_ERR
1988 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1989 			       rdev->mc_fw->size, fw_name);
1990 			err = -EINVAL;
1991 		}
1992 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1993 
1994 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1995 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1996 		if (err) {
1997 			printk(KERN_ERR
1998 			       "smc: error loading firmware \"%s\"\n",
1999 			       fw_name);
2000 			release_firmware(rdev->smc_fw);
2001 			rdev->smc_fw = NULL;
2002 			err = 0;
2003 		} else if (rdev->smc_fw->size != smc_req_size) {
2004 			printk(KERN_ERR
2005 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2006 			       rdev->smc_fw->size, fw_name);
2007 			err = -EINVAL;
2008 		}
2009 	}
2010 
2011 out:
2012 	if (err) {
2013 		if (err != -EINVAL)
2014 			printk(KERN_ERR
2015 			       "cik_cp: Failed to load firmware \"%s\"\n",
2016 			       fw_name);
2017 		release_firmware(rdev->pfp_fw);
2018 		rdev->pfp_fw = NULL;
2019 		release_firmware(rdev->me_fw);
2020 		rdev->me_fw = NULL;
2021 		release_firmware(rdev->ce_fw);
2022 		rdev->ce_fw = NULL;
2023 		release_firmware(rdev->rlc_fw);
2024 		rdev->rlc_fw = NULL;
2025 		release_firmware(rdev->mc_fw);
2026 		rdev->mc_fw = NULL;
2027 		release_firmware(rdev->smc_fw);
2028 		rdev->smc_fw = NULL;
2029 	}
2030 	return err;
2031 }
2032 
2033 /*
2034  * Core functions
2035  */
2036 /**
2037  * cik_tiling_mode_table_init - init the hw tiling table
2038  *
2039  * @rdev: radeon_device pointer
2040  *
2041  * Starting with SI, the tiling setup is done globally in a
2042  * set of 32 tiling modes.  Rather than selecting each set of
2043  * parameters per surface as on older asics, we just select
2044  * which index in the tiling table we want to use, and the
2045  * surface uses those parameters (CIK).
2046  */
2047 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2048 {
2049 	const u32 num_tile_mode_states = 32;
2050 	const u32 num_secondary_tile_mode_states = 16;
2051 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2052 	u32 num_pipe_configs;
2053 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2054 		rdev->config.cik.max_shader_engines;
2055 
2056 	switch (rdev->config.cik.mem_row_size_in_kb) {
2057 	case 1:
2058 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2059 		break;
2060 	case 2:
2061 	default:
2062 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2063 		break;
2064 	case 4:
2065 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2066 		break;
2067 	}
2068 
2069 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2070 	if (num_pipe_configs > 8)
2071 		num_pipe_configs = 16;
2072 
2073 	if (num_pipe_configs == 16) {
2074 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2075 			switch (reg_offset) {
2076 			case 0:
2077 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2079 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2081 				break;
2082 			case 1:
2083 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2085 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2086 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2087 				break;
2088 			case 2:
2089 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2091 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2093 				break;
2094 			case 3:
2095 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2097 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2098 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2099 				break;
2100 			case 4:
2101 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2102 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2103 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2104 						 TILE_SPLIT(split_equal_to_row_size));
2105 				break;
2106 			case 5:
2107 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2108 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2109 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 				break;
2111 			case 6:
2112 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2113 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2114 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2115 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2116 				break;
2117 			case 7:
2118 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2119 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2120 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2121 						 TILE_SPLIT(split_equal_to_row_size));
2122 				break;
2123 			case 8:
2124 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2125 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2126 				break;
2127 			case 9:
2128 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2130 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2131 				break;
2132 			case 10:
2133 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2135 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2136 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2137 				break;
2138 			case 11:
2139 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2140 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2141 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2142 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143 				break;
2144 			case 12:
2145 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2148 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 				break;
2150 			case 13:
2151 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2152 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2153 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2154 				break;
2155 			case 14:
2156 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2158 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2159 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160 				break;
2161 			case 16:
2162 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2165 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2166 				break;
2167 			case 17:
2168 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2169 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2171 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 				break;
2173 			case 27:
2174 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2177 				break;
2178 			case 28:
2179 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2181 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 				break;
2184 			case 29:
2185 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2187 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2188 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 				break;
2190 			case 30:
2191 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2192 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2193 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2195 				break;
2196 			default:
2197 				gb_tile_moden = 0;
2198 				break;
2199 			}
2200 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2201 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2202 		}
2203 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2204 			switch (reg_offset) {
2205 			case 0:
2206 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2209 						 NUM_BANKS(ADDR_SURF_16_BANK));
2210 				break;
2211 			case 1:
2212 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2214 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2215 						 NUM_BANKS(ADDR_SURF_16_BANK));
2216 				break;
2217 			case 2:
2218 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2221 						 NUM_BANKS(ADDR_SURF_16_BANK));
2222 				break;
2223 			case 3:
2224 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2226 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2227 						 NUM_BANKS(ADDR_SURF_16_BANK));
2228 				break;
2229 			case 4:
2230 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2232 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2233 						 NUM_BANKS(ADDR_SURF_8_BANK));
2234 				break;
2235 			case 5:
2236 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2239 						 NUM_BANKS(ADDR_SURF_4_BANK));
2240 				break;
2241 			case 6:
2242 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2245 						 NUM_BANKS(ADDR_SURF_2_BANK));
2246 				break;
2247 			case 8:
2248 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251 						 NUM_BANKS(ADDR_SURF_16_BANK));
2252 				break;
2253 			case 9:
2254 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 						 NUM_BANKS(ADDR_SURF_16_BANK));
2258 				break;
2259 			case 10:
2260 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263 						 NUM_BANKS(ADDR_SURF_16_BANK));
2264 				break;
2265 			case 11:
2266 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2269 						 NUM_BANKS(ADDR_SURF_8_BANK));
2270 				break;
2271 			case 12:
2272 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2275 						 NUM_BANKS(ADDR_SURF_4_BANK));
2276 				break;
2277 			case 13:
2278 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281 						 NUM_BANKS(ADDR_SURF_2_BANK));
2282 				break;
2283 			case 14:
2284 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2287 						 NUM_BANKS(ADDR_SURF_2_BANK));
2288 				break;
2289 			default:
2290 				gb_tile_moden = 0;
2291 				break;
2292 			}
2293 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2294 		}
2295 	} else if (num_pipe_configs == 8) {
2296 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2297 			switch (reg_offset) {
2298 			case 0:
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2303 				break;
2304 			case 1:
2305 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2308 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2309 				break;
2310 			case 2:
2311 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2313 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2314 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2315 				break;
2316 			case 3:
2317 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2321 				break;
2322 			case 4:
2323 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2326 						 TILE_SPLIT(split_equal_to_row_size));
2327 				break;
2328 			case 5:
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332 				break;
2333 			case 6:
2334 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2335 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2337 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2338 				break;
2339 			case 7:
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2341 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2343 						 TILE_SPLIT(split_equal_to_row_size));
2344 				break;
2345 			case 8:
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2347 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2348 				break;
2349 			case 9:
2350 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2353 				break;
2354 			case 10:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2358 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 				break;
2360 			case 11:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2364 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 				break;
2366 			case 12:
2367 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2370 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2371 				break;
2372 			case 13:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2376 				break;
2377 			case 14:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2381 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 				break;
2383 			case 16:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2387 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 				break;
2389 			case 17:
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2393 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394 				break;
2395 			case 27:
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2398 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2399 				break;
2400 			case 28:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			case 29:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2410 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 				break;
2412 			case 30:
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2416 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 				break;
2418 			default:
2419 				gb_tile_moden = 0;
2420 				break;
2421 			}
2422 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2423 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2424 		}
2425 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2426 			switch (reg_offset) {
2427 			case 0:
2428 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2431 						 NUM_BANKS(ADDR_SURF_16_BANK));
2432 				break;
2433 			case 1:
2434 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK));
2438 				break;
2439 			case 2:
2440 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK));
2444 				break;
2445 			case 3:
2446 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 						 NUM_BANKS(ADDR_SURF_16_BANK));
2450 				break;
2451 			case 4:
2452 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 						 NUM_BANKS(ADDR_SURF_8_BANK));
2456 				break;
2457 			case 5:
2458 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461 						 NUM_BANKS(ADDR_SURF_4_BANK));
2462 				break;
2463 			case 6:
2464 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 						 NUM_BANKS(ADDR_SURF_2_BANK));
2468 				break;
2469 			case 8:
2470 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK));
2474 				break;
2475 			case 9:
2476 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2479 						 NUM_BANKS(ADDR_SURF_16_BANK));
2480 				break;
2481 			case 10:
2482 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485 						 NUM_BANKS(ADDR_SURF_16_BANK));
2486 				break;
2487 			case 11:
2488 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491 						 NUM_BANKS(ADDR_SURF_16_BANK));
2492 				break;
2493 			case 12:
2494 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497 						 NUM_BANKS(ADDR_SURF_8_BANK));
2498 				break;
2499 			case 13:
2500 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503 						 NUM_BANKS(ADDR_SURF_4_BANK));
2504 				break;
2505 			case 14:
2506 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2509 						 NUM_BANKS(ADDR_SURF_2_BANK));
2510 				break;
2511 			default:
2512 				gb_tile_moden = 0;
2513 				break;
2514 			}
2515 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2516 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2517 		}
2518 	} else if (num_pipe_configs == 4) {
2519 		if (num_rbs == 4) {
2520 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2521 				switch (reg_offset) {
2522 				case 0:
2523 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2527 					break;
2528 				case 1:
2529 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2533 					break;
2534 				case 2:
2535 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539 					break;
2540 				case 3:
2541 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2545 					break;
2546 				case 4:
2547 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 							 TILE_SPLIT(split_equal_to_row_size));
2551 					break;
2552 				case 5:
2553 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2556 					break;
2557 				case 6:
2558 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2559 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2560 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2562 					break;
2563 				case 7:
2564 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567 							 TILE_SPLIT(split_equal_to_row_size));
2568 					break;
2569 				case 8:
2570 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2571 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2572 					break;
2573 				case 9:
2574 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2575 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2577 					break;
2578 				case 10:
2579 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2582 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 					break;
2584 				case 11:
2585 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 					break;
2590 				case 12:
2591 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2592 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595 					break;
2596 				case 13:
2597 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2600 					break;
2601 				case 14:
2602 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 					break;
2607 				case 16:
2608 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612 					break;
2613 				case 17:
2614 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618 					break;
2619 				case 27:
2620 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2623 					break;
2624 				case 28:
2625 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2626 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 					break;
2630 				case 29:
2631 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 					break;
2636 				case 30:
2637 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2638 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2639 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 					break;
2642 				default:
2643 					gb_tile_moden = 0;
2644 					break;
2645 				}
2646 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2647 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2648 			}
2649 		} else if (num_rbs < 4) {
2650 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2651 				switch (reg_offset) {
2652 				case 0:
2653 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657 					break;
2658 				case 1:
2659 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2661 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2663 					break;
2664 				case 2:
2665 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 					break;
2670 				case 3:
2671 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2675 					break;
2676 				case 4:
2677 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2679 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2680 							 TILE_SPLIT(split_equal_to_row_size));
2681 					break;
2682 				case 5:
2683 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2684 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2685 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686 					break;
2687 				case 6:
2688 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2692 					break;
2693 				case 7:
2694 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2695 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2696 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697 							 TILE_SPLIT(split_equal_to_row_size));
2698 					break;
2699 				case 8:
2700 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2701 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2702 					break;
2703 				case 9:
2704 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2707 					break;
2708 				case 10:
2709 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713 					break;
2714 				case 11:
2715 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 					break;
2720 				case 12:
2721 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2722 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 					break;
2726 				case 13:
2727 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2730 					break;
2731 				case 14:
2732 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 					break;
2737 				case 16:
2738 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 					break;
2743 				case 17:
2744 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2745 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 					break;
2749 				case 27:
2750 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2753 					break;
2754 				case 28:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 					break;
2760 				case 29:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 					break;
2766 				case 30:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771 					break;
2772 				default:
2773 					gb_tile_moden = 0;
2774 					break;
2775 				}
2776 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2777 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2778 			}
2779 		}
2780 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2781 			switch (reg_offset) {
2782 			case 0:
2783 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786 						 NUM_BANKS(ADDR_SURF_16_BANK));
2787 				break;
2788 			case 1:
2789 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792 						 NUM_BANKS(ADDR_SURF_16_BANK));
2793 				break;
2794 			case 2:
2795 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798 						 NUM_BANKS(ADDR_SURF_16_BANK));
2799 				break;
2800 			case 3:
2801 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804 						 NUM_BANKS(ADDR_SURF_16_BANK));
2805 				break;
2806 			case 4:
2807 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2810 						 NUM_BANKS(ADDR_SURF_16_BANK));
2811 				break;
2812 			case 5:
2813 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816 						 NUM_BANKS(ADDR_SURF_8_BANK));
2817 				break;
2818 			case 6:
2819 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2822 						 NUM_BANKS(ADDR_SURF_4_BANK));
2823 				break;
2824 			case 8:
2825 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2826 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2827 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828 						 NUM_BANKS(ADDR_SURF_16_BANK));
2829 				break;
2830 			case 9:
2831 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2832 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 						 NUM_BANKS(ADDR_SURF_16_BANK));
2835 				break;
2836 			case 10:
2837 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 						 NUM_BANKS(ADDR_SURF_16_BANK));
2841 				break;
2842 			case 11:
2843 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 						 NUM_BANKS(ADDR_SURF_16_BANK));
2847 				break;
2848 			case 12:
2849 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 						 NUM_BANKS(ADDR_SURF_16_BANK));
2853 				break;
2854 			case 13:
2855 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2858 						 NUM_BANKS(ADDR_SURF_8_BANK));
2859 				break;
2860 			case 14:
2861 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2864 						 NUM_BANKS(ADDR_SURF_4_BANK));
2865 				break;
2866 			default:
2867 				gb_tile_moden = 0;
2868 				break;
2869 			}
2870 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2871 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2872 		}
2873 	} else if (num_pipe_configs == 2) {
2874 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2875 			switch (reg_offset) {
2876 			case 0:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879 						 PIPE_CONFIG(ADDR_SURF_P2) |
2880 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881 				break;
2882 			case 1:
2883 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2885 						 PIPE_CONFIG(ADDR_SURF_P2) |
2886 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2887 				break;
2888 			case 2:
2889 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 						 PIPE_CONFIG(ADDR_SURF_P2) |
2892 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 				break;
2894 			case 3:
2895 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897 						 PIPE_CONFIG(ADDR_SURF_P2) |
2898 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2899 				break;
2900 			case 4:
2901 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2903 						 PIPE_CONFIG(ADDR_SURF_P2) |
2904 						 TILE_SPLIT(split_equal_to_row_size));
2905 				break;
2906 			case 5:
2907 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908 						 PIPE_CONFIG(ADDR_SURF_P2) |
2909 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 				break;
2911 			case 6:
2912 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914 						 PIPE_CONFIG(ADDR_SURF_P2) |
2915 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2916 				break;
2917 			case 7:
2918 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2919 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2920 						 PIPE_CONFIG(ADDR_SURF_P2) |
2921 						 TILE_SPLIT(split_equal_to_row_size));
2922 				break;
2923 			case 8:
2924 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2925 						PIPE_CONFIG(ADDR_SURF_P2);
2926 				break;
2927 			case 9:
2928 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930 						 PIPE_CONFIG(ADDR_SURF_P2));
2931 				break;
2932 			case 10:
2933 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 						 PIPE_CONFIG(ADDR_SURF_P2) |
2936 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937 				break;
2938 			case 11:
2939 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941 						 PIPE_CONFIG(ADDR_SURF_P2) |
2942 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 				break;
2944 			case 12:
2945 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2946 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 						 PIPE_CONFIG(ADDR_SURF_P2) |
2948 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 				break;
2950 			case 13:
2951 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952 						 PIPE_CONFIG(ADDR_SURF_P2) |
2953 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2954 				break;
2955 			case 14:
2956 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 						 PIPE_CONFIG(ADDR_SURF_P2) |
2959 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 				break;
2961 			case 16:
2962 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964 						 PIPE_CONFIG(ADDR_SURF_P2) |
2965 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966 				break;
2967 			case 17:
2968 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2969 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 						 PIPE_CONFIG(ADDR_SURF_P2) |
2971 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972 				break;
2973 			case 27:
2974 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2975 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 						 PIPE_CONFIG(ADDR_SURF_P2));
2977 				break;
2978 			case 28:
2979 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 						 PIPE_CONFIG(ADDR_SURF_P2) |
2982 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 				break;
2984 			case 29:
2985 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987 						 PIPE_CONFIG(ADDR_SURF_P2) |
2988 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2989 				break;
2990 			case 30:
2991 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2992 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2993 						 PIPE_CONFIG(ADDR_SURF_P2) |
2994 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2995 				break;
2996 			default:
2997 				gb_tile_moden = 0;
2998 				break;
2999 			}
3000 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3001 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3002 		}
3003 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3004 			switch (reg_offset) {
3005 			case 0:
3006 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3007 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3008 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009 						 NUM_BANKS(ADDR_SURF_16_BANK));
3010 				break;
3011 			case 1:
3012 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3013 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3014 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 						 NUM_BANKS(ADDR_SURF_16_BANK));
3016 				break;
3017 			case 2:
3018 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3020 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 						 NUM_BANKS(ADDR_SURF_16_BANK));
3022 				break;
3023 			case 3:
3024 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 						 NUM_BANKS(ADDR_SURF_16_BANK));
3028 				break;
3029 			case 4:
3030 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 						 NUM_BANKS(ADDR_SURF_16_BANK));
3034 				break;
3035 			case 5:
3036 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 						 NUM_BANKS(ADDR_SURF_16_BANK));
3040 				break;
3041 			case 6:
3042 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3044 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3045 						 NUM_BANKS(ADDR_SURF_8_BANK));
3046 				break;
3047 			case 8:
3048 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3049 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3050 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051 						 NUM_BANKS(ADDR_SURF_16_BANK));
3052 				break;
3053 			case 9:
3054 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3055 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3056 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 						 NUM_BANKS(ADDR_SURF_16_BANK));
3058 				break;
3059 			case 10:
3060 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3061 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3062 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3063 						 NUM_BANKS(ADDR_SURF_16_BANK));
3064 				break;
3065 			case 11:
3066 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3067 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 						 NUM_BANKS(ADDR_SURF_16_BANK));
3070 				break;
3071 			case 12:
3072 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3075 						 NUM_BANKS(ADDR_SURF_16_BANK));
3076 				break;
3077 			case 13:
3078 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081 						 NUM_BANKS(ADDR_SURF_16_BANK));
3082 				break;
3083 			case 14:
3084 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3087 						 NUM_BANKS(ADDR_SURF_8_BANK));
3088 				break;
3089 			default:
3090 				gb_tile_moden = 0;
3091 				break;
3092 			}
3093 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3094 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3095 		}
3096 	} else
3097 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3098 }
3099 
3100 /**
3101  * cik_select_se_sh - select which SE, SH to address
3102  *
3103  * @rdev: radeon_device pointer
3104  * @se_num: shader engine to address
3105  * @sh_num: sh block to address
3106  *
3107  * Select which SE, SH combinations to address. Certain
3108  * registers are instanced per SE or SH.  0xffffffff means
3109  * broadcast to all SEs or SHs (CIK).
3110  */
3111 static void cik_select_se_sh(struct radeon_device *rdev,
3112 			     u32 se_num, u32 sh_num)
3113 {
3114 	u32 data = INSTANCE_BROADCAST_WRITES;
3115 
3116 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3117 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3118 	else if (se_num == 0xffffffff)
3119 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3120 	else if (sh_num == 0xffffffff)
3121 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3122 	else
3123 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3124 	WREG32(GRBM_GFX_INDEX, data);
3125 }
3126 
3127 /**
3128  * cik_create_bitmask - create a bitmask
3129  *
3130  * @bit_width: length of the mask
3131  *
3132  * create a variable length bit mask (CIK).
3133  * Returns the bitmask.
3134  */
3135 static u32 cik_create_bitmask(u32 bit_width)
3136 {
3137 	u32 i, mask = 0;
3138 
3139 	for (i = 0; i < bit_width; i++) {
3140 		mask <<= 1;
3141 		mask |= 1;
3142 	}
3143 	return mask;
3144 }
3145 
3146 /**
3147  * cik_get_rb_disabled - computes the mask of disabled RBs
3148  *
3149  * @rdev: radeon_device pointer
3150  * @max_rb_num: max RBs (render backends) for the asic
3151  * @se_num: number of SEs (shader engines) for the asic
3152  * @sh_per_se: number of SH blocks per SE for the asic
3153  *
3154  * Calculates the bitmask of disabled RBs (CIK).
3155  * Returns the disabled RB bitmask.
3156  */
3157 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3158 			      u32 max_rb_num_per_se,
3159 			      u32 sh_per_se)
3160 {
3161 	u32 data, mask;
3162 
3163 	data = RREG32(CC_RB_BACKEND_DISABLE);
3164 	if (data & 1)
3165 		data &= BACKEND_DISABLE_MASK;
3166 	else
3167 		data = 0;
3168 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3169 
3170 	data >>= BACKEND_DISABLE_SHIFT;
3171 
3172 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3173 
3174 	return data & mask;
3175 }
3176 
3177 /**
3178  * cik_setup_rb - setup the RBs on the asic
3179  *
3180  * @rdev: radeon_device pointer
3181  * @se_num: number of SEs (shader engines) for the asic
3182  * @sh_per_se: number of SH blocks per SE for the asic
3183  * @max_rb_num: max RBs (render backends) for the asic
3184  *
3185  * Configures per-SE/SH RB registers (CIK).
3186  */
3187 static void cik_setup_rb(struct radeon_device *rdev,
3188 			 u32 se_num, u32 sh_per_se,
3189 			 u32 max_rb_num_per_se)
3190 {
3191 	int i, j;
3192 	u32 data, mask;
3193 	u32 disabled_rbs = 0;
3194 	u32 enabled_rbs = 0;
3195 
3196 	for (i = 0; i < se_num; i++) {
3197 		for (j = 0; j < sh_per_se; j++) {
3198 			cik_select_se_sh(rdev, i, j);
3199 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3200 			if (rdev->family == CHIP_HAWAII)
3201 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3202 			else
3203 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3204 		}
3205 	}
3206 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3207 
3208 	mask = 1;
3209 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3210 		if (!(disabled_rbs & mask))
3211 			enabled_rbs |= mask;
3212 		mask <<= 1;
3213 	}
3214 
3215 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3216 
3217 	for (i = 0; i < se_num; i++) {
3218 		cik_select_se_sh(rdev, i, 0xffffffff);
3219 		data = 0;
3220 		for (j = 0; j < sh_per_se; j++) {
3221 			switch (enabled_rbs & 3) {
3222 			case 0:
3223 				if (j == 0)
3224 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3225 				else
3226 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3227 				break;
3228 			case 1:
3229 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3230 				break;
3231 			case 2:
3232 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3233 				break;
3234 			case 3:
3235 			default:
3236 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3237 				break;
3238 			}
3239 			enabled_rbs >>= 2;
3240 		}
3241 		WREG32(PA_SC_RASTER_CONFIG, data);
3242 	}
3243 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3244 }
3245 
3246 /**
3247  * cik_gpu_init - setup the 3D engine
3248  *
3249  * @rdev: radeon_device pointer
3250  *
3251  * Configures the 3D engine and tiling configuration
3252  * registers so that the 3D engine is usable.
3253  */
3254 static void cik_gpu_init(struct radeon_device *rdev)
3255 {
3256 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3257 	u32 mc_shared_chmap, mc_arb_ramcfg;
3258 	u32 hdp_host_path_cntl;
3259 	u32 tmp;
3260 	int i, j;
3261 
3262 	switch (rdev->family) {
3263 	case CHIP_BONAIRE:
3264 		rdev->config.cik.max_shader_engines = 2;
3265 		rdev->config.cik.max_tile_pipes = 4;
3266 		rdev->config.cik.max_cu_per_sh = 7;
3267 		rdev->config.cik.max_sh_per_se = 1;
3268 		rdev->config.cik.max_backends_per_se = 2;
3269 		rdev->config.cik.max_texture_channel_caches = 4;
3270 		rdev->config.cik.max_gprs = 256;
3271 		rdev->config.cik.max_gs_threads = 32;
3272 		rdev->config.cik.max_hw_contexts = 8;
3273 
3274 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279 		break;
3280 	case CHIP_HAWAII:
3281 		rdev->config.cik.max_shader_engines = 4;
3282 		rdev->config.cik.max_tile_pipes = 16;
3283 		rdev->config.cik.max_cu_per_sh = 11;
3284 		rdev->config.cik.max_sh_per_se = 1;
3285 		rdev->config.cik.max_backends_per_se = 4;
3286 		rdev->config.cik.max_texture_channel_caches = 16;
3287 		rdev->config.cik.max_gprs = 256;
3288 		rdev->config.cik.max_gs_threads = 32;
3289 		rdev->config.cik.max_hw_contexts = 8;
3290 
3291 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3296 		break;
3297 	case CHIP_KAVERI:
3298 		rdev->config.cik.max_shader_engines = 1;
3299 		rdev->config.cik.max_tile_pipes = 4;
3300 		if ((rdev->pdev->device == 0x1304) ||
3301 		    (rdev->pdev->device == 0x1305) ||
3302 		    (rdev->pdev->device == 0x130C) ||
3303 		    (rdev->pdev->device == 0x130F) ||
3304 		    (rdev->pdev->device == 0x1310) ||
3305 		    (rdev->pdev->device == 0x1311) ||
3306 		    (rdev->pdev->device == 0x131C)) {
3307 			rdev->config.cik.max_cu_per_sh = 8;
3308 			rdev->config.cik.max_backends_per_se = 2;
3309 		} else if ((rdev->pdev->device == 0x1309) ||
3310 			   (rdev->pdev->device == 0x130A) ||
3311 			   (rdev->pdev->device == 0x130D) ||
3312 			   (rdev->pdev->device == 0x1313) ||
3313 			   (rdev->pdev->device == 0x131D)) {
3314 			rdev->config.cik.max_cu_per_sh = 6;
3315 			rdev->config.cik.max_backends_per_se = 2;
3316 		} else if ((rdev->pdev->device == 0x1306) ||
3317 			   (rdev->pdev->device == 0x1307) ||
3318 			   (rdev->pdev->device == 0x130B) ||
3319 			   (rdev->pdev->device == 0x130E) ||
3320 			   (rdev->pdev->device == 0x1315) ||
3321 			   (rdev->pdev->device == 0x131B)) {
3322 			rdev->config.cik.max_cu_per_sh = 4;
3323 			rdev->config.cik.max_backends_per_se = 1;
3324 		} else {
3325 			rdev->config.cik.max_cu_per_sh = 3;
3326 			rdev->config.cik.max_backends_per_se = 1;
3327 		}
3328 		rdev->config.cik.max_sh_per_se = 1;
3329 		rdev->config.cik.max_texture_channel_caches = 4;
3330 		rdev->config.cik.max_gprs = 256;
3331 		rdev->config.cik.max_gs_threads = 16;
3332 		rdev->config.cik.max_hw_contexts = 8;
3333 
3334 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3335 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3336 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3337 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3338 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3339 		break;
3340 	case CHIP_KABINI:
3341 	case CHIP_MULLINS:
3342 	default:
3343 		rdev->config.cik.max_shader_engines = 1;
3344 		rdev->config.cik.max_tile_pipes = 2;
3345 		rdev->config.cik.max_cu_per_sh = 2;
3346 		rdev->config.cik.max_sh_per_se = 1;
3347 		rdev->config.cik.max_backends_per_se = 1;
3348 		rdev->config.cik.max_texture_channel_caches = 2;
3349 		rdev->config.cik.max_gprs = 256;
3350 		rdev->config.cik.max_gs_threads = 16;
3351 		rdev->config.cik.max_hw_contexts = 8;
3352 
3353 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3354 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3355 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3356 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3357 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3358 		break;
3359 	}
3360 
3361 	/* Initialize HDP */
3362 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3363 		WREG32((0x2c14 + j), 0x00000000);
3364 		WREG32((0x2c18 + j), 0x00000000);
3365 		WREG32((0x2c1c + j), 0x00000000);
3366 		WREG32((0x2c20 + j), 0x00000000);
3367 		WREG32((0x2c24 + j), 0x00000000);
3368 	}
3369 
3370 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3371 
3372 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3373 
3374 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3375 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3376 
3377 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3378 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3379 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3380 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3381 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3382 		rdev->config.cik.mem_row_size_in_kb = 4;
3383 	/* XXX use MC settings? */
3384 	rdev->config.cik.shader_engine_tile_size = 32;
3385 	rdev->config.cik.num_gpus = 1;
3386 	rdev->config.cik.multi_gpu_tile_size = 64;
3387 
3388 	/* fix up row size */
3389 	gb_addr_config &= ~ROW_SIZE_MASK;
3390 	switch (rdev->config.cik.mem_row_size_in_kb) {
3391 	case 1:
3392 	default:
3393 		gb_addr_config |= ROW_SIZE(0);
3394 		break;
3395 	case 2:
3396 		gb_addr_config |= ROW_SIZE(1);
3397 		break;
3398 	case 4:
3399 		gb_addr_config |= ROW_SIZE(2);
3400 		break;
3401 	}
3402 
3403 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3404 	 * not have bank info, so create a custom tiling dword.
3405 	 * bits 3:0   num_pipes
3406 	 * bits 7:4   num_banks
3407 	 * bits 11:8  group_size
3408 	 * bits 15:12 row_size
3409 	 */
3410 	rdev->config.cik.tile_config = 0;
3411 	switch (rdev->config.cik.num_tile_pipes) {
3412 	case 1:
3413 		rdev->config.cik.tile_config |= (0 << 0);
3414 		break;
3415 	case 2:
3416 		rdev->config.cik.tile_config |= (1 << 0);
3417 		break;
3418 	case 4:
3419 		rdev->config.cik.tile_config |= (2 << 0);
3420 		break;
3421 	case 8:
3422 	default:
3423 		/* XXX what about 12? */
3424 		rdev->config.cik.tile_config |= (3 << 0);
3425 		break;
3426 	}
3427 	rdev->config.cik.tile_config |=
3428 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3429 	rdev->config.cik.tile_config |=
3430 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3431 	rdev->config.cik.tile_config |=
3432 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3433 
3434 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3435 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3436 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3437 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3438 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3439 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3440 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3441 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3442 
3443 	cik_tiling_mode_table_init(rdev);
3444 
3445 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3446 		     rdev->config.cik.max_sh_per_se,
3447 		     rdev->config.cik.max_backends_per_se);
3448 
3449 	/* set HW defaults for 3D engine */
3450 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3451 
3452 	WREG32(SX_DEBUG_1, 0x20);
3453 
3454 	WREG32(TA_CNTL_AUX, 0x00010000);
3455 
3456 	tmp = RREG32(SPI_CONFIG_CNTL);
3457 	tmp |= 0x03000000;
3458 	WREG32(SPI_CONFIG_CNTL, tmp);
3459 
3460 	WREG32(SQ_CONFIG, 1);
3461 
3462 	WREG32(DB_DEBUG, 0);
3463 
3464 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3465 	tmp |= 0x00000400;
3466 	WREG32(DB_DEBUG2, tmp);
3467 
3468 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3469 	tmp |= 0x00020200;
3470 	WREG32(DB_DEBUG3, tmp);
3471 
3472 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3473 	tmp |= 0x00018208;
3474 	WREG32(CB_HW_CONTROL, tmp);
3475 
3476 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3477 
3478 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3479 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3480 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3481 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3482 
3483 	WREG32(VGT_NUM_INSTANCES, 1);
3484 
3485 	WREG32(CP_PERFMON_CNTL, 0);
3486 
3487 	WREG32(SQ_CONFIG, 0);
3488 
3489 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3490 					  FORCE_EOV_MAX_REZ_CNT(255)));
3491 
3492 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3493 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3494 
3495 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3496 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3497 
3498 	tmp = RREG32(HDP_MISC_CNTL);
3499 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3500 	WREG32(HDP_MISC_CNTL, tmp);
3501 
3502 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3503 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3504 
3505 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3506 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3507 
3508 	udelay(50);
3509 }
3510 
3511 /*
3512  * GPU scratch registers helpers function.
3513  */
3514 /**
3515  * cik_scratch_init - setup driver info for CP scratch regs
3516  *
3517  * @rdev: radeon_device pointer
3518  *
3519  * Set up the number and offset of the CP scratch registers.
3520  * NOTE: use of CP scratch registers is a legacy inferface and
3521  * is not used by default on newer asics (r6xx+).  On newer asics,
3522  * memory buffers are used for fences rather than scratch regs.
3523  */
3524 static void cik_scratch_init(struct radeon_device *rdev)
3525 {
3526 	int i;
3527 
3528 	rdev->scratch.num_reg = 7;
3529 	rdev->scratch.reg_base = SCRATCH_REG0;
3530 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3531 		rdev->scratch.free[i] = true;
3532 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3533 	}
3534 }
3535 
3536 /**
3537  * cik_ring_test - basic gfx ring test
3538  *
3539  * @rdev: radeon_device pointer
3540  * @ring: radeon_ring structure holding ring information
3541  *
3542  * Allocate a scratch register and write to it using the gfx ring (CIK).
3543  * Provides a basic gfx ring test to verify that the ring is working.
3544  * Used by cik_cp_gfx_resume();
3545  * Returns 0 on success, error on failure.
3546  */
3547 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3548 {
3549 	uint32_t scratch;
3550 	uint32_t tmp = 0;
3551 	unsigned i;
3552 	int r;
3553 
3554 	r = radeon_scratch_get(rdev, &scratch);
3555 	if (r) {
3556 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3557 		return r;
3558 	}
3559 	WREG32(scratch, 0xCAFEDEAD);
3560 	r = radeon_ring_lock(rdev, ring, 3);
3561 	if (r) {
3562 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3563 		radeon_scratch_free(rdev, scratch);
3564 		return r;
3565 	}
3566 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3567 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3568 	radeon_ring_write(ring, 0xDEADBEEF);
3569 	radeon_ring_unlock_commit(rdev, ring);
3570 
3571 	for (i = 0; i < rdev->usec_timeout; i++) {
3572 		tmp = RREG32(scratch);
3573 		if (tmp == 0xDEADBEEF)
3574 			break;
3575 		DRM_UDELAY(1);
3576 	}
3577 	if (i < rdev->usec_timeout) {
3578 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3579 	} else {
3580 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3581 			  ring->idx, scratch, tmp);
3582 		r = -EINVAL;
3583 	}
3584 	radeon_scratch_free(rdev, scratch);
3585 	return r;
3586 }
3587 
3588 /**
3589  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3590  *
3591  * @rdev: radeon_device pointer
3592  * @ridx: radeon ring index
3593  *
3594  * Emits an hdp flush on the cp.
3595  */
3596 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3597 				       int ridx)
3598 {
3599 	struct radeon_ring *ring = &rdev->ring[ridx];
3600 	u32 ref_and_mask;
3601 
3602 	switch (ring->idx) {
3603 	case CAYMAN_RING_TYPE_CP1_INDEX:
3604 	case CAYMAN_RING_TYPE_CP2_INDEX:
3605 	default:
3606 		switch (ring->me) {
3607 		case 0:
3608 			ref_and_mask = CP2 << ring->pipe;
3609 			break;
3610 		case 1:
3611 			ref_and_mask = CP6 << ring->pipe;
3612 			break;
3613 		default:
3614 			return;
3615 		}
3616 		break;
3617 	case RADEON_RING_TYPE_GFX_INDEX:
3618 		ref_and_mask = CP0;
3619 		break;
3620 	}
3621 
3622 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3623 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3624 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3625 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3626 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3627 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3628 	radeon_ring_write(ring, ref_and_mask);
3629 	radeon_ring_write(ring, ref_and_mask);
3630 	radeon_ring_write(ring, 0x20); /* poll interval */
3631 }
3632 
3633 /**
3634  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3635  *
3636  * @rdev: radeon_device pointer
3637  * @fence: radeon fence object
3638  *
3639  * Emits a fence sequnce number on the gfx ring and flushes
3640  * GPU caches.
3641  */
3642 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3643 			     struct radeon_fence *fence)
3644 {
3645 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3646 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3647 
3648 	/* EVENT_WRITE_EOP - flush caches, send int */
3649 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3650 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3651 				 EOP_TC_ACTION_EN |
3652 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3653 				 EVENT_INDEX(5)));
3654 	radeon_ring_write(ring, addr & 0xfffffffc);
3655 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3656 	radeon_ring_write(ring, fence->seq);
3657 	radeon_ring_write(ring, 0);
3658 	/* HDP flush */
3659 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3660 }
3661 
3662 /**
3663  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3664  *
3665  * @rdev: radeon_device pointer
3666  * @fence: radeon fence object
3667  *
3668  * Emits a fence sequnce number on the compute ring and flushes
3669  * GPU caches.
3670  */
3671 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3672 				 struct radeon_fence *fence)
3673 {
3674 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3675 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3676 
3677 	/* RELEASE_MEM - flush caches, send int */
3678 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3679 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3680 				 EOP_TC_ACTION_EN |
3681 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3682 				 EVENT_INDEX(5)));
3683 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3684 	radeon_ring_write(ring, addr & 0xfffffffc);
3685 	radeon_ring_write(ring, upper_32_bits(addr));
3686 	radeon_ring_write(ring, fence->seq);
3687 	radeon_ring_write(ring, 0);
3688 	/* HDP flush */
3689 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3690 }
3691 
3692 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3693 			     struct radeon_ring *ring,
3694 			     struct radeon_semaphore *semaphore,
3695 			     bool emit_wait)
3696 {
3697 	uint64_t addr = semaphore->gpu_addr;
3698 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3699 
3700 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3701 	radeon_ring_write(ring, addr & 0xffffffff);
3702 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3703 
3704 	return true;
3705 }
3706 
3707 /**
3708  * cik_copy_cpdma - copy pages using the CP DMA engine
3709  *
3710  * @rdev: radeon_device pointer
3711  * @src_offset: src GPU address
3712  * @dst_offset: dst GPU address
3713  * @num_gpu_pages: number of GPU pages to xfer
3714  * @fence: radeon fence object
3715  *
3716  * Copy GPU paging using the CP DMA engine (CIK+).
3717  * Used by the radeon ttm implementation to move pages if
3718  * registered as the asic copy callback.
3719  */
3720 int cik_copy_cpdma(struct radeon_device *rdev,
3721 		   uint64_t src_offset, uint64_t dst_offset,
3722 		   unsigned num_gpu_pages,
3723 		   struct radeon_fence **fence)
3724 {
3725 	struct radeon_semaphore *sem = NULL;
3726 	int ring_index = rdev->asic->copy.blit_ring_index;
3727 	struct radeon_ring *ring = &rdev->ring[ring_index];
3728 	u32 size_in_bytes, cur_size_in_bytes, control;
3729 	int i, num_loops;
3730 	int r = 0;
3731 
3732 	r = radeon_semaphore_create(rdev, &sem);
3733 	if (r) {
3734 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3735 		return r;
3736 	}
3737 
3738 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3739 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3740 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3741 	if (r) {
3742 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3743 		radeon_semaphore_free(rdev, &sem, NULL);
3744 		return r;
3745 	}
3746 
3747 	radeon_semaphore_sync_to(sem, *fence);
3748 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3749 
3750 	for (i = 0; i < num_loops; i++) {
3751 		cur_size_in_bytes = size_in_bytes;
3752 		if (cur_size_in_bytes > 0x1fffff)
3753 			cur_size_in_bytes = 0x1fffff;
3754 		size_in_bytes -= cur_size_in_bytes;
3755 		control = 0;
3756 		if (size_in_bytes == 0)
3757 			control |= PACKET3_DMA_DATA_CP_SYNC;
3758 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3759 		radeon_ring_write(ring, control);
3760 		radeon_ring_write(ring, lower_32_bits(src_offset));
3761 		radeon_ring_write(ring, upper_32_bits(src_offset));
3762 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3763 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3764 		radeon_ring_write(ring, cur_size_in_bytes);
3765 		src_offset += cur_size_in_bytes;
3766 		dst_offset += cur_size_in_bytes;
3767 	}
3768 
3769 	r = radeon_fence_emit(rdev, fence, ring->idx);
3770 	if (r) {
3771 		radeon_ring_unlock_undo(rdev, ring);
3772 		radeon_semaphore_free(rdev, &sem, NULL);
3773 		return r;
3774 	}
3775 
3776 	radeon_ring_unlock_commit(rdev, ring);
3777 	radeon_semaphore_free(rdev, &sem, *fence);
3778 
3779 	return r;
3780 }
3781 
3782 /*
3783  * IB stuff
3784  */
3785 /**
3786  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3787  *
3788  * @rdev: radeon_device pointer
3789  * @ib: radeon indirect buffer object
3790  *
3791  * Emits an DE (drawing engine) or CE (constant engine) IB
3792  * on the gfx ring.  IBs are usually generated by userspace
3793  * acceleration drivers and submitted to the kernel for
3794  * sheduling on the ring.  This function schedules the IB
3795  * on the gfx ring for execution by the GPU.
3796  */
3797 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3798 {
3799 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3800 	u32 header, control = INDIRECT_BUFFER_VALID;
3801 
3802 	if (ib->is_const_ib) {
3803 		/* set switch buffer packet before const IB */
3804 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3805 		radeon_ring_write(ring, 0);
3806 
3807 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3808 	} else {
3809 		u32 next_rptr;
3810 		if (ring->rptr_save_reg) {
3811 			next_rptr = ring->wptr + 3 + 4;
3812 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3813 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3814 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3815 			radeon_ring_write(ring, next_rptr);
3816 		} else if (rdev->wb.enabled) {
3817 			next_rptr = ring->wptr + 5 + 4;
3818 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3819 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3820 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3821 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3822 			radeon_ring_write(ring, next_rptr);
3823 		}
3824 
3825 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3826 	}
3827 
3828 	control |= ib->length_dw |
3829 		(ib->vm ? (ib->vm->id << 24) : 0);
3830 
3831 	radeon_ring_write(ring, header);
3832 	radeon_ring_write(ring,
3833 #ifdef __BIG_ENDIAN
3834 			  (2 << 0) |
3835 #endif
3836 			  (ib->gpu_addr & 0xFFFFFFFC));
3837 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3838 	radeon_ring_write(ring, control);
3839 }
3840 
3841 /**
3842  * cik_ib_test - basic gfx ring IB test
3843  *
3844  * @rdev: radeon_device pointer
3845  * @ring: radeon_ring structure holding ring information
3846  *
3847  * Allocate an IB and execute it on the gfx ring (CIK).
3848  * Provides a basic gfx ring test to verify that IBs are working.
3849  * Returns 0 on success, error on failure.
3850  */
3851 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3852 {
3853 	struct radeon_ib ib;
3854 	uint32_t scratch;
3855 	uint32_t tmp = 0;
3856 	unsigned i;
3857 	int r;
3858 
3859 	r = radeon_scratch_get(rdev, &scratch);
3860 	if (r) {
3861 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3862 		return r;
3863 	}
3864 	WREG32(scratch, 0xCAFEDEAD);
3865 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3866 	if (r) {
3867 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3868 		radeon_scratch_free(rdev, scratch);
3869 		return r;
3870 	}
3871 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3872 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3873 	ib.ptr[2] = 0xDEADBEEF;
3874 	ib.length_dw = 3;
3875 	r = radeon_ib_schedule(rdev, &ib, NULL);
3876 	if (r) {
3877 		radeon_scratch_free(rdev, scratch);
3878 		radeon_ib_free(rdev, &ib);
3879 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3880 		return r;
3881 	}
3882 	r = radeon_fence_wait(ib.fence, false);
3883 	if (r) {
3884 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3885 		radeon_scratch_free(rdev, scratch);
3886 		radeon_ib_free(rdev, &ib);
3887 		return r;
3888 	}
3889 	for (i = 0; i < rdev->usec_timeout; i++) {
3890 		tmp = RREG32(scratch);
3891 		if (tmp == 0xDEADBEEF)
3892 			break;
3893 		DRM_UDELAY(1);
3894 	}
3895 	if (i < rdev->usec_timeout) {
3896 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3897 	} else {
3898 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3899 			  scratch, tmp);
3900 		r = -EINVAL;
3901 	}
3902 	radeon_scratch_free(rdev, scratch);
3903 	radeon_ib_free(rdev, &ib);
3904 	return r;
3905 }
3906 
3907 /*
3908  * CP.
3909  * On CIK, gfx and compute now have independant command processors.
3910  *
3911  * GFX
3912  * Gfx consists of a single ring and can process both gfx jobs and
3913  * compute jobs.  The gfx CP consists of three microengines (ME):
3914  * PFP - Pre-Fetch Parser
3915  * ME - Micro Engine
3916  * CE - Constant Engine
3917  * The PFP and ME make up what is considered the Drawing Engine (DE).
3918  * The CE is an asynchronous engine used for updating buffer desciptors
3919  * used by the DE so that they can be loaded into cache in parallel
3920  * while the DE is processing state update packets.
3921  *
3922  * Compute
3923  * The compute CP consists of two microengines (ME):
3924  * MEC1 - Compute MicroEngine 1
3925  * MEC2 - Compute MicroEngine 2
3926  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3927  * The queues are exposed to userspace and are programmed directly
3928  * by the compute runtime.
3929  */
3930 /**
3931  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3932  *
3933  * @rdev: radeon_device pointer
3934  * @enable: enable or disable the MEs
3935  *
3936  * Halts or unhalts the gfx MEs.
3937  */
3938 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3939 {
3940 	if (enable)
3941 		WREG32(CP_ME_CNTL, 0);
3942 	else {
3943 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3944 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3945 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3946 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3947 	}
3948 	udelay(50);
3949 }
3950 
3951 /**
3952  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3953  *
3954  * @rdev: radeon_device pointer
3955  *
3956  * Loads the gfx PFP, ME, and CE ucode.
3957  * Returns 0 for success, -EINVAL if the ucode is not available.
3958  */
3959 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3960 {
3961 	const __be32 *fw_data;
3962 	int i;
3963 
3964 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3965 		return -EINVAL;
3966 
3967 	cik_cp_gfx_enable(rdev, false);
3968 
3969 	/* PFP */
3970 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3971 	WREG32(CP_PFP_UCODE_ADDR, 0);
3972 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3973 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3974 	WREG32(CP_PFP_UCODE_ADDR, 0);
3975 
3976 	/* CE */
3977 	fw_data = (const __be32 *)rdev->ce_fw->data;
3978 	WREG32(CP_CE_UCODE_ADDR, 0);
3979 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3980 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3981 	WREG32(CP_CE_UCODE_ADDR, 0);
3982 
3983 	/* ME */
3984 	fw_data = (const __be32 *)rdev->me_fw->data;
3985 	WREG32(CP_ME_RAM_WADDR, 0);
3986 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3987 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3988 	WREG32(CP_ME_RAM_WADDR, 0);
3989 
3990 	WREG32(CP_PFP_UCODE_ADDR, 0);
3991 	WREG32(CP_CE_UCODE_ADDR, 0);
3992 	WREG32(CP_ME_RAM_WADDR, 0);
3993 	WREG32(CP_ME_RAM_RADDR, 0);
3994 	return 0;
3995 }
3996 
3997 /**
3998  * cik_cp_gfx_start - start the gfx ring
3999  *
4000  * @rdev: radeon_device pointer
4001  *
4002  * Enables the ring and loads the clear state context and other
4003  * packets required to init the ring.
4004  * Returns 0 for success, error for failure.
4005  */
4006 static int cik_cp_gfx_start(struct radeon_device *rdev)
4007 {
4008 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4009 	int r, i;
4010 
4011 	/* init the CP */
4012 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4013 	WREG32(CP_ENDIAN_SWAP, 0);
4014 	WREG32(CP_DEVICE_ID, 1);
4015 
4016 	cik_cp_gfx_enable(rdev, true);
4017 
4018 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4019 	if (r) {
4020 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4021 		return r;
4022 	}
4023 
4024 	/* init the CE partitions.  CE only used for gfx on CIK */
4025 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4026 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4027 	radeon_ring_write(ring, 0xc000);
4028 	radeon_ring_write(ring, 0xc000);
4029 
4030 	/* setup clear context state */
4031 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4032 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4033 
4034 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4035 	radeon_ring_write(ring, 0x80000000);
4036 	radeon_ring_write(ring, 0x80000000);
4037 
4038 	for (i = 0; i < cik_default_size; i++)
4039 		radeon_ring_write(ring, cik_default_state[i]);
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4043 
4044 	/* set clear context state */
4045 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4046 	radeon_ring_write(ring, 0);
4047 
4048 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4049 	radeon_ring_write(ring, 0x00000316);
4050 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4051 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4052 
4053 	radeon_ring_unlock_commit(rdev, ring);
4054 
4055 	return 0;
4056 }
4057 
4058 /**
4059  * cik_cp_gfx_fini - stop the gfx ring
4060  *
4061  * @rdev: radeon_device pointer
4062  *
4063  * Stop the gfx ring and tear down the driver ring
4064  * info.
4065  */
4066 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4067 {
4068 	cik_cp_gfx_enable(rdev, false);
4069 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4070 }
4071 
4072 /**
4073  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4074  *
4075  * @rdev: radeon_device pointer
4076  *
4077  * Program the location and size of the gfx ring buffer
4078  * and test it to make sure it's working.
4079  * Returns 0 for success, error for failure.
4080  */
4081 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4082 {
4083 	struct radeon_ring *ring;
4084 	u32 tmp;
4085 	u32 rb_bufsz;
4086 	u64 rb_addr;
4087 	int r;
4088 
4089 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4090 	if (rdev->family != CHIP_HAWAII)
4091 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4092 
4093 	/* Set the write pointer delay */
4094 	WREG32(CP_RB_WPTR_DELAY, 0);
4095 
4096 	/* set the RB to use vmid 0 */
4097 	WREG32(CP_RB_VMID, 0);
4098 
4099 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4100 
4101 	/* ring 0 - compute and gfx */
4102 	/* Set ring buffer size */
4103 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4104 	rb_bufsz = order_base_2(ring->ring_size / 8);
4105 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4106 #ifdef __BIG_ENDIAN
4107 	tmp |= BUF_SWAP_32BIT;
4108 #endif
4109 	WREG32(CP_RB0_CNTL, tmp);
4110 
4111 	/* Initialize the ring buffer's read and write pointers */
4112 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4113 	ring->wptr = 0;
4114 	WREG32(CP_RB0_WPTR, ring->wptr);
4115 
4116 	/* set the wb address wether it's enabled or not */
4117 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4118 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4119 
4120 	/* scratch register shadowing is no longer supported */
4121 	WREG32(SCRATCH_UMSK, 0);
4122 
4123 	if (!rdev->wb.enabled)
4124 		tmp |= RB_NO_UPDATE;
4125 
4126 	mdelay(1);
4127 	WREG32(CP_RB0_CNTL, tmp);
4128 
4129 	rb_addr = ring->gpu_addr >> 8;
4130 	WREG32(CP_RB0_BASE, rb_addr);
4131 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4132 
4133 	/* start the ring */
4134 	cik_cp_gfx_start(rdev);
4135 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4136 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4137 	if (r) {
4138 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4139 		return r;
4140 	}
4141 
4142 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4143 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4144 
4145 	return 0;
4146 }
4147 
4148 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4149 		     struct radeon_ring *ring)
4150 {
4151 	u32 rptr;
4152 
4153 	if (rdev->wb.enabled)
4154 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4155 	else
4156 		rptr = RREG32(CP_RB0_RPTR);
4157 
4158 	return rptr;
4159 }
4160 
4161 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4162 		     struct radeon_ring *ring)
4163 {
4164 	u32 wptr;
4165 
4166 	wptr = RREG32(CP_RB0_WPTR);
4167 
4168 	return wptr;
4169 }
4170 
4171 void cik_gfx_set_wptr(struct radeon_device *rdev,
4172 		      struct radeon_ring *ring)
4173 {
4174 	WREG32(CP_RB0_WPTR, ring->wptr);
4175 	(void)RREG32(CP_RB0_WPTR);
4176 }
4177 
4178 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4179 			 struct radeon_ring *ring)
4180 {
4181 	u32 rptr;
4182 
4183 	if (rdev->wb.enabled) {
4184 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4185 	} else {
4186 		mutex_lock(&rdev->srbm_mutex);
4187 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4188 		rptr = RREG32(CP_HQD_PQ_RPTR);
4189 		cik_srbm_select(rdev, 0, 0, 0, 0);
4190 		mutex_unlock(&rdev->srbm_mutex);
4191 	}
4192 
4193 	return rptr;
4194 }
4195 
4196 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4197 			 struct radeon_ring *ring)
4198 {
4199 	u32 wptr;
4200 
4201 	if (rdev->wb.enabled) {
4202 		/* XXX check if swapping is necessary on BE */
4203 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4204 	} else {
4205 		mutex_lock(&rdev->srbm_mutex);
4206 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4207 		wptr = RREG32(CP_HQD_PQ_WPTR);
4208 		cik_srbm_select(rdev, 0, 0, 0, 0);
4209 		mutex_unlock(&rdev->srbm_mutex);
4210 	}
4211 
4212 	return wptr;
4213 }
4214 
4215 void cik_compute_set_wptr(struct radeon_device *rdev,
4216 			  struct radeon_ring *ring)
4217 {
4218 	/* XXX check if swapping is necessary on BE */
4219 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4220 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4221 }
4222 
4223 /**
4224  * cik_cp_compute_enable - enable/disable the compute CP MEs
4225  *
4226  * @rdev: radeon_device pointer
4227  * @enable: enable or disable the MEs
4228  *
4229  * Halts or unhalts the compute MEs.
4230  */
4231 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232 {
4233 	if (enable)
4234 		WREG32(CP_MEC_CNTL, 0);
4235 	else {
4236 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4237 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4238 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4239 	}
4240 	udelay(50);
4241 }
4242 
4243 /**
4244  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4245  *
4246  * @rdev: radeon_device pointer
4247  *
4248  * Loads the compute MEC1&2 ucode.
4249  * Returns 0 for success, -EINVAL if the ucode is not available.
4250  */
4251 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4252 {
4253 	const __be32 *fw_data;
4254 	int i;
4255 
4256 	if (!rdev->mec_fw)
4257 		return -EINVAL;
4258 
4259 	cik_cp_compute_enable(rdev, false);
4260 
4261 	/* MEC1 */
4262 	fw_data = (const __be32 *)rdev->mec_fw->data;
4263 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4264 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4265 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4266 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4267 
4268 	if (rdev->family == CHIP_KAVERI) {
4269 		/* MEC2 */
4270 		fw_data = (const __be32 *)rdev->mec_fw->data;
4271 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4272 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4273 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4274 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4275 	}
4276 
4277 	return 0;
4278 }
4279 
4280 /**
4281  * cik_cp_compute_start - start the compute queues
4282  *
4283  * @rdev: radeon_device pointer
4284  *
4285  * Enable the compute queues.
4286  * Returns 0 for success, error for failure.
4287  */
4288 static int cik_cp_compute_start(struct radeon_device *rdev)
4289 {
4290 	cik_cp_compute_enable(rdev, true);
4291 
4292 	return 0;
4293 }
4294 
4295 /**
4296  * cik_cp_compute_fini - stop the compute queues
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Stop the compute queues and tear down the driver queue
4301  * info.
4302  */
4303 static void cik_cp_compute_fini(struct radeon_device *rdev)
4304 {
4305 	int i, idx, r;
4306 
4307 	cik_cp_compute_enable(rdev, false);
4308 
4309 	for (i = 0; i < 2; i++) {
4310 		if (i == 0)
4311 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4312 		else
4313 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4314 
4315 		if (rdev->ring[idx].mqd_obj) {
4316 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4317 			if (unlikely(r != 0))
4318 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4319 
4320 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4321 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4322 
4323 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4324 			rdev->ring[idx].mqd_obj = NULL;
4325 		}
4326 	}
4327 }
4328 
4329 static void cik_mec_fini(struct radeon_device *rdev)
4330 {
4331 	int r;
4332 
4333 	if (rdev->mec.hpd_eop_obj) {
4334 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4335 		if (unlikely(r != 0))
4336 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4337 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4338 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4339 
4340 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4341 		rdev->mec.hpd_eop_obj = NULL;
4342 	}
4343 }
4344 
4345 #define MEC_HPD_SIZE 2048
4346 
4347 static int cik_mec_init(struct radeon_device *rdev)
4348 {
4349 	int r;
4350 	u32 *hpd;
4351 
4352 	/*
4353 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4354 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4355 	 */
4356 	if (rdev->family == CHIP_KAVERI)
4357 		rdev->mec.num_mec = 2;
4358 	else
4359 		rdev->mec.num_mec = 1;
4360 	rdev->mec.num_pipe = 4;
4361 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4362 
4363 	if (rdev->mec.hpd_eop_obj == NULL) {
4364 		r = radeon_bo_create(rdev,
4365 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4366 				     PAGE_SIZE, true,
4367 				     RADEON_GEM_DOMAIN_GTT, NULL,
4368 				     &rdev->mec.hpd_eop_obj);
4369 		if (r) {
4370 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4371 			return r;
4372 		}
4373 	}
4374 
4375 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4376 	if (unlikely(r != 0)) {
4377 		cik_mec_fini(rdev);
4378 		return r;
4379 	}
4380 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4381 			  &rdev->mec.hpd_eop_gpu_addr);
4382 	if (r) {
4383 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4384 		cik_mec_fini(rdev);
4385 		return r;
4386 	}
4387 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4388 	if (r) {
4389 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4390 		cik_mec_fini(rdev);
4391 		return r;
4392 	}
4393 
4394 	/* clear memory.  Not sure if this is required or not */
4395 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4396 
4397 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4398 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4399 
4400 	return 0;
4401 }
4402 
4403 struct hqd_registers
4404 {
4405 	u32 cp_mqd_base_addr;
4406 	u32 cp_mqd_base_addr_hi;
4407 	u32 cp_hqd_active;
4408 	u32 cp_hqd_vmid;
4409 	u32 cp_hqd_persistent_state;
4410 	u32 cp_hqd_pipe_priority;
4411 	u32 cp_hqd_queue_priority;
4412 	u32 cp_hqd_quantum;
4413 	u32 cp_hqd_pq_base;
4414 	u32 cp_hqd_pq_base_hi;
4415 	u32 cp_hqd_pq_rptr;
4416 	u32 cp_hqd_pq_rptr_report_addr;
4417 	u32 cp_hqd_pq_rptr_report_addr_hi;
4418 	u32 cp_hqd_pq_wptr_poll_addr;
4419 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4420 	u32 cp_hqd_pq_doorbell_control;
4421 	u32 cp_hqd_pq_wptr;
4422 	u32 cp_hqd_pq_control;
4423 	u32 cp_hqd_ib_base_addr;
4424 	u32 cp_hqd_ib_base_addr_hi;
4425 	u32 cp_hqd_ib_rptr;
4426 	u32 cp_hqd_ib_control;
4427 	u32 cp_hqd_iq_timer;
4428 	u32 cp_hqd_iq_rptr;
4429 	u32 cp_hqd_dequeue_request;
4430 	u32 cp_hqd_dma_offload;
4431 	u32 cp_hqd_sema_cmd;
4432 	u32 cp_hqd_msg_type;
4433 	u32 cp_hqd_atomic0_preop_lo;
4434 	u32 cp_hqd_atomic0_preop_hi;
4435 	u32 cp_hqd_atomic1_preop_lo;
4436 	u32 cp_hqd_atomic1_preop_hi;
4437 	u32 cp_hqd_hq_scheduler0;
4438 	u32 cp_hqd_hq_scheduler1;
4439 	u32 cp_mqd_control;
4440 };
4441 
4442 struct bonaire_mqd
4443 {
4444 	u32 header;
4445 	u32 dispatch_initiator;
4446 	u32 dimensions[3];
4447 	u32 start_idx[3];
4448 	u32 num_threads[3];
4449 	u32 pipeline_stat_enable;
4450 	u32 perf_counter_enable;
4451 	u32 pgm[2];
4452 	u32 tba[2];
4453 	u32 tma[2];
4454 	u32 pgm_rsrc[2];
4455 	u32 vmid;
4456 	u32 resource_limits;
4457 	u32 static_thread_mgmt01[2];
4458 	u32 tmp_ring_size;
4459 	u32 static_thread_mgmt23[2];
4460 	u32 restart[3];
4461 	u32 thread_trace_enable;
4462 	u32 reserved1;
4463 	u32 user_data[16];
4464 	u32 vgtcs_invoke_count[2];
4465 	struct hqd_registers queue_state;
4466 	u32 dequeue_cntr;
4467 	u32 interrupt_queue[64];
4468 };
4469 
4470 /**
4471  * cik_cp_compute_resume - setup the compute queue registers
4472  *
4473  * @rdev: radeon_device pointer
4474  *
4475  * Program the compute queues and test them to make sure they
4476  * are working.
4477  * Returns 0 for success, error for failure.
4478  */
4479 static int cik_cp_compute_resume(struct radeon_device *rdev)
4480 {
4481 	int r, i, idx;
4482 	u32 tmp;
4483 	bool use_doorbell = true;
4484 	u64 hqd_gpu_addr;
4485 	u64 mqd_gpu_addr;
4486 	u64 eop_gpu_addr;
4487 	u64 wb_gpu_addr;
4488 	u32 *buf;
4489 	struct bonaire_mqd *mqd;
4490 
4491 	r = cik_cp_compute_start(rdev);
4492 	if (r)
4493 		return r;
4494 
4495 	/* fix up chicken bits */
4496 	tmp = RREG32(CP_CPF_DEBUG);
4497 	tmp |= (1 << 23);
4498 	WREG32(CP_CPF_DEBUG, tmp);
4499 
4500 	/* init the pipes */
4501 	mutex_lock(&rdev->srbm_mutex);
4502 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4503 		int me = (i < 4) ? 1 : 2;
4504 		int pipe = (i < 4) ? i : (i - 4);
4505 
4506 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4507 
4508 		cik_srbm_select(rdev, me, pipe, 0, 0);
4509 
4510 		/* write the EOP addr */
4511 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4512 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4513 
4514 		/* set the VMID assigned */
4515 		WREG32(CP_HPD_EOP_VMID, 0);
4516 
4517 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4518 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4519 		tmp &= ~EOP_SIZE_MASK;
4520 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4521 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4522 	}
4523 	cik_srbm_select(rdev, 0, 0, 0, 0);
4524 	mutex_unlock(&rdev->srbm_mutex);
4525 
4526 	/* init the queues.  Just two for now. */
4527 	for (i = 0; i < 2; i++) {
4528 		if (i == 0)
4529 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4530 		else
4531 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4532 
4533 		if (rdev->ring[idx].mqd_obj == NULL) {
4534 			r = radeon_bo_create(rdev,
4535 					     sizeof(struct bonaire_mqd),
4536 					     PAGE_SIZE, true,
4537 					     RADEON_GEM_DOMAIN_GTT, NULL,
4538 					     &rdev->ring[idx].mqd_obj);
4539 			if (r) {
4540 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4541 				return r;
4542 			}
4543 		}
4544 
4545 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4546 		if (unlikely(r != 0)) {
4547 			cik_cp_compute_fini(rdev);
4548 			return r;
4549 		}
4550 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4551 				  &mqd_gpu_addr);
4552 		if (r) {
4553 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4554 			cik_cp_compute_fini(rdev);
4555 			return r;
4556 		}
4557 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4558 		if (r) {
4559 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4560 			cik_cp_compute_fini(rdev);
4561 			return r;
4562 		}
4563 
4564 		/* init the mqd struct */
4565 		memset(buf, 0, sizeof(struct bonaire_mqd));
4566 
4567 		mqd = (struct bonaire_mqd *)buf;
4568 		mqd->header = 0xC0310800;
4569 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4570 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4571 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4572 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4573 
4574 		mutex_lock(&rdev->srbm_mutex);
4575 		cik_srbm_select(rdev, rdev->ring[idx].me,
4576 				rdev->ring[idx].pipe,
4577 				rdev->ring[idx].queue, 0);
4578 
4579 		/* disable wptr polling */
4580 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4581 		tmp &= ~WPTR_POLL_EN;
4582 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4583 
4584 		/* enable doorbell? */
4585 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4586 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4587 		if (use_doorbell)
4588 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4589 		else
4590 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4591 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4592 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4593 
4594 		/* disable the queue if it's active */
4595 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4596 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4597 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4598 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4599 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4600 			for (i = 0; i < rdev->usec_timeout; i++) {
4601 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4602 					break;
4603 				udelay(1);
4604 			}
4605 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4606 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4607 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4608 		}
4609 
4610 		/* set the pointer to the MQD */
4611 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4612 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4613 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4614 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4615 		/* set MQD vmid to 0 */
4616 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4617 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4618 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4619 
4620 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4621 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4622 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4623 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4624 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4625 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4626 
4627 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4628 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4629 		mqd->queue_state.cp_hqd_pq_control &=
4630 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4631 
4632 		mqd->queue_state.cp_hqd_pq_control |=
4633 			order_base_2(rdev->ring[idx].ring_size / 8);
4634 		mqd->queue_state.cp_hqd_pq_control |=
4635 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4636 #ifdef __BIG_ENDIAN
4637 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4638 #endif
4639 		mqd->queue_state.cp_hqd_pq_control &=
4640 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4641 		mqd->queue_state.cp_hqd_pq_control |=
4642 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4643 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4644 
4645 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4646 		if (i == 0)
4647 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4648 		else
4649 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4650 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4651 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4652 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4653 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4654 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4655 
4656 		/* set the wb address wether it's enabled or not */
4657 		if (i == 0)
4658 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4659 		else
4660 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4661 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4662 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4663 			upper_32_bits(wb_gpu_addr) & 0xffff;
4664 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4665 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4666 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4667 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4668 
4669 		/* enable the doorbell if requested */
4670 		if (use_doorbell) {
4671 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4672 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4673 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4674 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4675 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4676 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4677 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4678 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4679 
4680 		} else {
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4682 		}
4683 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4684 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4685 
4686 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4687 		rdev->ring[idx].wptr = 0;
4688 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4689 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4690 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4691 
4692 		/* set the vmid for the queue */
4693 		mqd->queue_state.cp_hqd_vmid = 0;
4694 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4695 
4696 		/* activate the queue */
4697 		mqd->queue_state.cp_hqd_active = 1;
4698 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4699 
4700 		cik_srbm_select(rdev, 0, 0, 0, 0);
4701 		mutex_unlock(&rdev->srbm_mutex);
4702 
4703 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4704 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4705 
4706 		rdev->ring[idx].ready = true;
4707 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4708 		if (r)
4709 			rdev->ring[idx].ready = false;
4710 	}
4711 
4712 	return 0;
4713 }
4714 
4715 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4716 {
4717 	cik_cp_gfx_enable(rdev, enable);
4718 	cik_cp_compute_enable(rdev, enable);
4719 }
4720 
4721 static int cik_cp_load_microcode(struct radeon_device *rdev)
4722 {
4723 	int r;
4724 
4725 	r = cik_cp_gfx_load_microcode(rdev);
4726 	if (r)
4727 		return r;
4728 	r = cik_cp_compute_load_microcode(rdev);
4729 	if (r)
4730 		return r;
4731 
4732 	return 0;
4733 }
4734 
4735 static void cik_cp_fini(struct radeon_device *rdev)
4736 {
4737 	cik_cp_gfx_fini(rdev);
4738 	cik_cp_compute_fini(rdev);
4739 }
4740 
4741 static int cik_cp_resume(struct radeon_device *rdev)
4742 {
4743 	int r;
4744 
4745 	cik_enable_gui_idle_interrupt(rdev, false);
4746 
4747 	r = cik_cp_load_microcode(rdev);
4748 	if (r)
4749 		return r;
4750 
4751 	r = cik_cp_gfx_resume(rdev);
4752 	if (r)
4753 		return r;
4754 	r = cik_cp_compute_resume(rdev);
4755 	if (r)
4756 		return r;
4757 
4758 	cik_enable_gui_idle_interrupt(rdev, true);
4759 
4760 	return 0;
4761 }
4762 
4763 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4764 {
4765 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4766 		RREG32(GRBM_STATUS));
4767 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4768 		RREG32(GRBM_STATUS2));
4769 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4770 		RREG32(GRBM_STATUS_SE0));
4771 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4772 		RREG32(GRBM_STATUS_SE1));
4773 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4774 		RREG32(GRBM_STATUS_SE2));
4775 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4776 		RREG32(GRBM_STATUS_SE3));
4777 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4778 		RREG32(SRBM_STATUS));
4779 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4780 		RREG32(SRBM_STATUS2));
4781 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4782 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4783 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4784 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4785 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4786 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4787 		 RREG32(CP_STALLED_STAT1));
4788 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4789 		 RREG32(CP_STALLED_STAT2));
4790 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4791 		 RREG32(CP_STALLED_STAT3));
4792 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4793 		 RREG32(CP_CPF_BUSY_STAT));
4794 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4795 		 RREG32(CP_CPF_STALLED_STAT1));
4796 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4797 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4798 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4799 		 RREG32(CP_CPC_STALLED_STAT1));
4800 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4801 }
4802 
4803 /**
4804  * cik_gpu_check_soft_reset - check which blocks are busy
4805  *
4806  * @rdev: radeon_device pointer
4807  *
4808  * Check which blocks are busy and return the relevant reset
4809  * mask to be used by cik_gpu_soft_reset().
4810  * Returns a mask of the blocks to be reset.
4811  */
4812 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4813 {
4814 	u32 reset_mask = 0;
4815 	u32 tmp;
4816 
4817 	/* GRBM_STATUS */
4818 	tmp = RREG32(GRBM_STATUS);
4819 	if (tmp & (PA_BUSY | SC_BUSY |
4820 		   BCI_BUSY | SX_BUSY |
4821 		   TA_BUSY | VGT_BUSY |
4822 		   DB_BUSY | CB_BUSY |
4823 		   GDS_BUSY | SPI_BUSY |
4824 		   IA_BUSY | IA_BUSY_NO_DMA))
4825 		reset_mask |= RADEON_RESET_GFX;
4826 
4827 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4828 		reset_mask |= RADEON_RESET_CP;
4829 
4830 	/* GRBM_STATUS2 */
4831 	tmp = RREG32(GRBM_STATUS2);
4832 	if (tmp & RLC_BUSY)
4833 		reset_mask |= RADEON_RESET_RLC;
4834 
4835 	/* SDMA0_STATUS_REG */
4836 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4837 	if (!(tmp & SDMA_IDLE))
4838 		reset_mask |= RADEON_RESET_DMA;
4839 
4840 	/* SDMA1_STATUS_REG */
4841 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4842 	if (!(tmp & SDMA_IDLE))
4843 		reset_mask |= RADEON_RESET_DMA1;
4844 
4845 	/* SRBM_STATUS2 */
4846 	tmp = RREG32(SRBM_STATUS2);
4847 	if (tmp & SDMA_BUSY)
4848 		reset_mask |= RADEON_RESET_DMA;
4849 
4850 	if (tmp & SDMA1_BUSY)
4851 		reset_mask |= RADEON_RESET_DMA1;
4852 
4853 	/* SRBM_STATUS */
4854 	tmp = RREG32(SRBM_STATUS);
4855 
4856 	if (tmp & IH_BUSY)
4857 		reset_mask |= RADEON_RESET_IH;
4858 
4859 	if (tmp & SEM_BUSY)
4860 		reset_mask |= RADEON_RESET_SEM;
4861 
4862 	if (tmp & GRBM_RQ_PENDING)
4863 		reset_mask |= RADEON_RESET_GRBM;
4864 
4865 	if (tmp & VMC_BUSY)
4866 		reset_mask |= RADEON_RESET_VMC;
4867 
4868 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4869 		   MCC_BUSY | MCD_BUSY))
4870 		reset_mask |= RADEON_RESET_MC;
4871 
4872 	if (evergreen_is_display_hung(rdev))
4873 		reset_mask |= RADEON_RESET_DISPLAY;
4874 
4875 	/* Skip MC reset as it's mostly likely not hung, just busy */
4876 	if (reset_mask & RADEON_RESET_MC) {
4877 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4878 		reset_mask &= ~RADEON_RESET_MC;
4879 	}
4880 
4881 	return reset_mask;
4882 }
4883 
4884 /**
4885  * cik_gpu_soft_reset - soft reset GPU
4886  *
4887  * @rdev: radeon_device pointer
4888  * @reset_mask: mask of which blocks to reset
4889  *
4890  * Soft reset the blocks specified in @reset_mask.
4891  */
4892 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4893 {
4894 	struct evergreen_mc_save save;
4895 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4896 	u32 tmp;
4897 
4898 	if (reset_mask == 0)
4899 		return;
4900 
4901 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4902 
4903 	cik_print_gpu_status_regs(rdev);
4904 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4905 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4906 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4907 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4908 
4909 	/* disable CG/PG */
4910 	cik_fini_pg(rdev);
4911 	cik_fini_cg(rdev);
4912 
4913 	/* stop the rlc */
4914 	cik_rlc_stop(rdev);
4915 
4916 	/* Disable GFX parsing/prefetching */
4917 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4918 
4919 	/* Disable MEC parsing/prefetching */
4920 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4921 
4922 	if (reset_mask & RADEON_RESET_DMA) {
4923 		/* sdma0 */
4924 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4925 		tmp |= SDMA_HALT;
4926 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4927 	}
4928 	if (reset_mask & RADEON_RESET_DMA1) {
4929 		/* sdma1 */
4930 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4931 		tmp |= SDMA_HALT;
4932 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4933 	}
4934 
4935 	evergreen_mc_stop(rdev, &save);
4936 	if (evergreen_mc_wait_for_idle(rdev)) {
4937 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4938 	}
4939 
4940 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4941 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4942 
4943 	if (reset_mask & RADEON_RESET_CP) {
4944 		grbm_soft_reset |= SOFT_RESET_CP;
4945 
4946 		srbm_soft_reset |= SOFT_RESET_GRBM;
4947 	}
4948 
4949 	if (reset_mask & RADEON_RESET_DMA)
4950 		srbm_soft_reset |= SOFT_RESET_SDMA;
4951 
4952 	if (reset_mask & RADEON_RESET_DMA1)
4953 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4954 
4955 	if (reset_mask & RADEON_RESET_DISPLAY)
4956 		srbm_soft_reset |= SOFT_RESET_DC;
4957 
4958 	if (reset_mask & RADEON_RESET_RLC)
4959 		grbm_soft_reset |= SOFT_RESET_RLC;
4960 
4961 	if (reset_mask & RADEON_RESET_SEM)
4962 		srbm_soft_reset |= SOFT_RESET_SEM;
4963 
4964 	if (reset_mask & RADEON_RESET_IH)
4965 		srbm_soft_reset |= SOFT_RESET_IH;
4966 
4967 	if (reset_mask & RADEON_RESET_GRBM)
4968 		srbm_soft_reset |= SOFT_RESET_GRBM;
4969 
4970 	if (reset_mask & RADEON_RESET_VMC)
4971 		srbm_soft_reset |= SOFT_RESET_VMC;
4972 
4973 	if (!(rdev->flags & RADEON_IS_IGP)) {
4974 		if (reset_mask & RADEON_RESET_MC)
4975 			srbm_soft_reset |= SOFT_RESET_MC;
4976 	}
4977 
4978 	if (grbm_soft_reset) {
4979 		tmp = RREG32(GRBM_SOFT_RESET);
4980 		tmp |= grbm_soft_reset;
4981 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4982 		WREG32(GRBM_SOFT_RESET, tmp);
4983 		tmp = RREG32(GRBM_SOFT_RESET);
4984 
4985 		udelay(50);
4986 
4987 		tmp &= ~grbm_soft_reset;
4988 		WREG32(GRBM_SOFT_RESET, tmp);
4989 		tmp = RREG32(GRBM_SOFT_RESET);
4990 	}
4991 
4992 	if (srbm_soft_reset) {
4993 		tmp = RREG32(SRBM_SOFT_RESET);
4994 		tmp |= srbm_soft_reset;
4995 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4996 		WREG32(SRBM_SOFT_RESET, tmp);
4997 		tmp = RREG32(SRBM_SOFT_RESET);
4998 
4999 		udelay(50);
5000 
5001 		tmp &= ~srbm_soft_reset;
5002 		WREG32(SRBM_SOFT_RESET, tmp);
5003 		tmp = RREG32(SRBM_SOFT_RESET);
5004 	}
5005 
5006 	/* Wait a little for things to settle down */
5007 	udelay(50);
5008 
5009 	evergreen_mc_resume(rdev, &save);
5010 	udelay(50);
5011 
5012 	cik_print_gpu_status_regs(rdev);
5013 }
5014 
5015 struct kv_reset_save_regs {
5016 	u32 gmcon_reng_execute;
5017 	u32 gmcon_misc;
5018 	u32 gmcon_misc3;
5019 };
5020 
5021 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5022 				   struct kv_reset_save_regs *save)
5023 {
5024 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5025 	save->gmcon_misc = RREG32(GMCON_MISC);
5026 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5027 
5028 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5029 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5030 						STCTRL_STUTTER_EN));
5031 }
5032 
5033 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5034 				      struct kv_reset_save_regs *save)
5035 {
5036 	int i;
5037 
5038 	WREG32(GMCON_PGFSM_WRITE, 0);
5039 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5040 
5041 	for (i = 0; i < 5; i++)
5042 		WREG32(GMCON_PGFSM_WRITE, 0);
5043 
5044 	WREG32(GMCON_PGFSM_WRITE, 0);
5045 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5046 
5047 	for (i = 0; i < 5; i++)
5048 		WREG32(GMCON_PGFSM_WRITE, 0);
5049 
5050 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5051 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5052 
5053 	for (i = 0; i < 5; i++)
5054 		WREG32(GMCON_PGFSM_WRITE, 0);
5055 
5056 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5057 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5058 
5059 	for (i = 0; i < 5; i++)
5060 		WREG32(GMCON_PGFSM_WRITE, 0);
5061 
5062 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5063 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5064 
5065 	for (i = 0; i < 5; i++)
5066 		WREG32(GMCON_PGFSM_WRITE, 0);
5067 
5068 	WREG32(GMCON_PGFSM_WRITE, 0);
5069 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5070 
5071 	for (i = 0; i < 5; i++)
5072 		WREG32(GMCON_PGFSM_WRITE, 0);
5073 
5074 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5075 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5076 
5077 	for (i = 0; i < 5; i++)
5078 		WREG32(GMCON_PGFSM_WRITE, 0);
5079 
5080 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5081 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5082 
5083 	for (i = 0; i < 5; i++)
5084 		WREG32(GMCON_PGFSM_WRITE, 0);
5085 
5086 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5087 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5088 
5089 	for (i = 0; i < 5; i++)
5090 		WREG32(GMCON_PGFSM_WRITE, 0);
5091 
5092 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5093 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5094 
5095 	for (i = 0; i < 5; i++)
5096 		WREG32(GMCON_PGFSM_WRITE, 0);
5097 
5098 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5099 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5100 
5101 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5102 	WREG32(GMCON_MISC, save->gmcon_misc);
5103 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5104 }
5105 
5106 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5107 {
5108 	struct evergreen_mc_save save;
5109 	struct kv_reset_save_regs kv_save = { 0 };
5110 	u32 tmp, i;
5111 
5112 	dev_info(rdev->dev, "GPU pci config reset\n");
5113 
5114 	/* disable dpm? */
5115 
5116 	/* disable cg/pg */
5117 	cik_fini_pg(rdev);
5118 	cik_fini_cg(rdev);
5119 
5120 	/* Disable GFX parsing/prefetching */
5121 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5122 
5123 	/* Disable MEC parsing/prefetching */
5124 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5125 
5126 	/* sdma0 */
5127 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5128 	tmp |= SDMA_HALT;
5129 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5130 	/* sdma1 */
5131 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5132 	tmp |= SDMA_HALT;
5133 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5134 	/* XXX other engines? */
5135 
5136 	/* halt the rlc, disable cp internal ints */
5137 	cik_rlc_stop(rdev);
5138 
5139 	udelay(50);
5140 
5141 	/* disable mem access */
5142 	evergreen_mc_stop(rdev, &save);
5143 	if (evergreen_mc_wait_for_idle(rdev)) {
5144 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5145 	}
5146 
5147 	if (rdev->flags & RADEON_IS_IGP)
5148 		kv_save_regs_for_reset(rdev, &kv_save);
5149 
5150 	/* disable BM */
5151 	pci_clear_master(rdev->pdev);
5152 	/* reset */
5153 	radeon_pci_config_reset(rdev);
5154 
5155 	udelay(100);
5156 
5157 	/* wait for asic to come out of reset */
5158 	for (i = 0; i < rdev->usec_timeout; i++) {
5159 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5160 			break;
5161 		udelay(1);
5162 	}
5163 
5164 	/* does asic init need to be run first??? */
5165 	if (rdev->flags & RADEON_IS_IGP)
5166 		kv_restore_regs_for_reset(rdev, &kv_save);
5167 }
5168 
5169 /**
5170  * cik_asic_reset - soft reset GPU
5171  *
5172  * @rdev: radeon_device pointer
5173  *
5174  * Look up which blocks are hung and attempt
5175  * to reset them.
5176  * Returns 0 for success.
5177  */
5178 int cik_asic_reset(struct radeon_device *rdev)
5179 {
5180 	u32 reset_mask;
5181 
5182 	reset_mask = cik_gpu_check_soft_reset(rdev);
5183 
5184 	if (reset_mask)
5185 		r600_set_bios_scratch_engine_hung(rdev, true);
5186 
5187 	/* try soft reset */
5188 	cik_gpu_soft_reset(rdev, reset_mask);
5189 
5190 	reset_mask = cik_gpu_check_soft_reset(rdev);
5191 
5192 	/* try pci config reset */
5193 	if (reset_mask && radeon_hard_reset)
5194 		cik_gpu_pci_config_reset(rdev);
5195 
5196 	reset_mask = cik_gpu_check_soft_reset(rdev);
5197 
5198 	if (!reset_mask)
5199 		r600_set_bios_scratch_engine_hung(rdev, false);
5200 
5201 	return 0;
5202 }
5203 
5204 /**
5205  * cik_gfx_is_lockup - check if the 3D engine is locked up
5206  *
5207  * @rdev: radeon_device pointer
5208  * @ring: radeon_ring structure holding ring information
5209  *
5210  * Check if the 3D engine is locked up (CIK).
5211  * Returns true if the engine is locked, false if not.
5212  */
5213 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5214 {
5215 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5216 
5217 	if (!(reset_mask & (RADEON_RESET_GFX |
5218 			    RADEON_RESET_COMPUTE |
5219 			    RADEON_RESET_CP))) {
5220 		radeon_ring_lockup_update(rdev, ring);
5221 		return false;
5222 	}
5223 	return radeon_ring_test_lockup(rdev, ring);
5224 }
5225 
5226 /* MC */
5227 /**
5228  * cik_mc_program - program the GPU memory controller
5229  *
5230  * @rdev: radeon_device pointer
5231  *
5232  * Set the location of vram, gart, and AGP in the GPU's
5233  * physical address space (CIK).
5234  */
5235 static void cik_mc_program(struct radeon_device *rdev)
5236 {
5237 	struct evergreen_mc_save save;
5238 	u32 tmp;
5239 	int i, j;
5240 
5241 	/* Initialize HDP */
5242 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5243 		WREG32((0x2c14 + j), 0x00000000);
5244 		WREG32((0x2c18 + j), 0x00000000);
5245 		WREG32((0x2c1c + j), 0x00000000);
5246 		WREG32((0x2c20 + j), 0x00000000);
5247 		WREG32((0x2c24 + j), 0x00000000);
5248 	}
5249 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5250 
5251 	evergreen_mc_stop(rdev, &save);
5252 	if (radeon_mc_wait_for_idle(rdev)) {
5253 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5254 	}
5255 	/* Lockout access through VGA aperture*/
5256 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5257 	/* Update configuration */
5258 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5259 	       rdev->mc.vram_start >> 12);
5260 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5261 	       rdev->mc.vram_end >> 12);
5262 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5263 	       rdev->vram_scratch.gpu_addr >> 12);
5264 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5265 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5266 	WREG32(MC_VM_FB_LOCATION, tmp);
5267 	/* XXX double check these! */
5268 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5269 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5270 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5271 	WREG32(MC_VM_AGP_BASE, 0);
5272 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5273 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5274 	if (radeon_mc_wait_for_idle(rdev)) {
5275 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276 	}
5277 	evergreen_mc_resume(rdev, &save);
5278 	/* we need to own VRAM, so turn off the VGA renderer here
5279 	 * to stop it overwriting our objects */
5280 	rv515_vga_render_disable(rdev);
5281 }
5282 
5283 /**
5284  * cik_mc_init - initialize the memory controller driver params
5285  *
5286  * @rdev: radeon_device pointer
5287  *
5288  * Look up the amount of vram, vram width, and decide how to place
5289  * vram and gart within the GPU's physical address space (CIK).
5290  * Returns 0 for success.
5291  */
5292 static int cik_mc_init(struct radeon_device *rdev)
5293 {
5294 	u32 tmp;
5295 	int chansize, numchan;
5296 
5297 	/* Get VRAM informations */
5298 	rdev->mc.vram_is_ddr = true;
5299 	tmp = RREG32(MC_ARB_RAMCFG);
5300 	if (tmp & CHANSIZE_MASK) {
5301 		chansize = 64;
5302 	} else {
5303 		chansize = 32;
5304 	}
5305 	tmp = RREG32(MC_SHARED_CHMAP);
5306 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5307 	case 0:
5308 	default:
5309 		numchan = 1;
5310 		break;
5311 	case 1:
5312 		numchan = 2;
5313 		break;
5314 	case 2:
5315 		numchan = 4;
5316 		break;
5317 	case 3:
5318 		numchan = 8;
5319 		break;
5320 	case 4:
5321 		numchan = 3;
5322 		break;
5323 	case 5:
5324 		numchan = 6;
5325 		break;
5326 	case 6:
5327 		numchan = 10;
5328 		break;
5329 	case 7:
5330 		numchan = 12;
5331 		break;
5332 	case 8:
5333 		numchan = 16;
5334 		break;
5335 	}
5336 	rdev->mc.vram_width = numchan * chansize;
5337 	/* Could aper size report 0 ? */
5338 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5339 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5340 	/* size in MB on si */
5341 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5342 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5343 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5344 	si_vram_gtt_location(rdev, &rdev->mc);
5345 	radeon_update_bandwidth_info(rdev);
5346 
5347 	return 0;
5348 }
5349 
5350 /*
5351  * GART
5352  * VMID 0 is the physical GPU addresses as used by the kernel.
5353  * VMIDs 1-15 are used for userspace clients and are handled
5354  * by the radeon vm/hsa code.
5355  */
5356 /**
5357  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5358  *
5359  * @rdev: radeon_device pointer
5360  *
5361  * Flush the TLB for the VMID 0 page table (CIK).
5362  */
5363 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5364 {
5365 	/* flush hdp cache */
5366 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5367 
5368 	/* bits 0-15 are the VM contexts0-15 */
5369 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5370 }
5371 
5372 /**
5373  * cik_pcie_gart_enable - gart enable
5374  *
5375  * @rdev: radeon_device pointer
5376  *
5377  * This sets up the TLBs, programs the page tables for VMID0,
5378  * sets up the hw for VMIDs 1-15 which are allocated on
5379  * demand, and sets up the global locations for the LDS, GDS,
5380  * and GPUVM for FSA64 clients (CIK).
5381  * Returns 0 for success, errors for failure.
5382  */
5383 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5384 {
5385 	int r, i;
5386 
5387 	if (rdev->gart.robj == NULL) {
5388 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5389 		return -EINVAL;
5390 	}
5391 	r = radeon_gart_table_vram_pin(rdev);
5392 	if (r)
5393 		return r;
5394 	radeon_gart_restore(rdev);
5395 	/* Setup TLB control */
5396 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5397 	       (0xA << 7) |
5398 	       ENABLE_L1_TLB |
5399 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5400 	       ENABLE_ADVANCED_DRIVER_MODEL |
5401 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5402 	/* Setup L2 cache */
5403 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5404 	       ENABLE_L2_FRAGMENT_PROCESSING |
5405 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5406 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5407 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5408 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5409 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5410 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5411 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5412 	/* setup context0 */
5413 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5414 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5415 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5416 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5417 			(u32)(rdev->dummy_page.addr >> 12));
5418 	WREG32(VM_CONTEXT0_CNTL2, 0);
5419 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5420 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5421 
5422 	WREG32(0x15D4, 0);
5423 	WREG32(0x15D8, 0);
5424 	WREG32(0x15DC, 0);
5425 
5426 	/* empty context1-15 */
5427 	/* FIXME start with 4G, once using 2 level pt switch to full
5428 	 * vm size space
5429 	 */
5430 	/* set vm size, must be a multiple of 4 */
5431 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5432 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5433 	for (i = 1; i < 16; i++) {
5434 		if (i < 8)
5435 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5436 			       rdev->gart.table_addr >> 12);
5437 		else
5438 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5439 			       rdev->gart.table_addr >> 12);
5440 	}
5441 
5442 	/* enable context1-15 */
5443 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5444 	       (u32)(rdev->dummy_page.addr >> 12));
5445 	WREG32(VM_CONTEXT1_CNTL2, 4);
5446 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5447 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5448 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5449 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5450 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5451 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5452 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5453 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5454 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5455 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5456 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5457 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5458 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5459 
5460 	if (rdev->family == CHIP_KAVERI) {
5461 		u32 tmp = RREG32(CHUB_CONTROL);
5462 		tmp &= ~BYPASS_VM;
5463 		WREG32(CHUB_CONTROL, tmp);
5464 	}
5465 
5466 	/* XXX SH_MEM regs */
5467 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5468 	mutex_lock(&rdev->srbm_mutex);
5469 	for (i = 0; i < 16; i++) {
5470 		cik_srbm_select(rdev, 0, 0, 0, i);
5471 		/* CP and shaders */
5472 		WREG32(SH_MEM_CONFIG, 0);
5473 		WREG32(SH_MEM_APE1_BASE, 1);
5474 		WREG32(SH_MEM_APE1_LIMIT, 0);
5475 		WREG32(SH_MEM_BASES, 0);
5476 		/* SDMA GFX */
5477 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5478 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5479 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5480 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5481 		/* XXX SDMA RLC - todo */
5482 	}
5483 	cik_srbm_select(rdev, 0, 0, 0, 0);
5484 	mutex_unlock(&rdev->srbm_mutex);
5485 
5486 	cik_pcie_gart_tlb_flush(rdev);
5487 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5488 		 (unsigned)(rdev->mc.gtt_size >> 20),
5489 		 (unsigned long long)rdev->gart.table_addr);
5490 	rdev->gart.ready = true;
5491 	return 0;
5492 }
5493 
5494 /**
5495  * cik_pcie_gart_disable - gart disable
5496  *
5497  * @rdev: radeon_device pointer
5498  *
5499  * This disables all VM page table (CIK).
5500  */
5501 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5502 {
5503 	/* Disable all tables */
5504 	WREG32(VM_CONTEXT0_CNTL, 0);
5505 	WREG32(VM_CONTEXT1_CNTL, 0);
5506 	/* Setup TLB control */
5507 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5508 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5509 	/* Setup L2 cache */
5510 	WREG32(VM_L2_CNTL,
5511 	       ENABLE_L2_FRAGMENT_PROCESSING |
5512 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5513 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5514 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5515 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5516 	WREG32(VM_L2_CNTL2, 0);
5517 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5518 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5519 	radeon_gart_table_vram_unpin(rdev);
5520 }
5521 
5522 /**
5523  * cik_pcie_gart_fini - vm fini callback
5524  *
5525  * @rdev: radeon_device pointer
5526  *
5527  * Tears down the driver GART/VM setup (CIK).
5528  */
5529 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5530 {
5531 	cik_pcie_gart_disable(rdev);
5532 	radeon_gart_table_vram_free(rdev);
5533 	radeon_gart_fini(rdev);
5534 }
5535 
5536 /* vm parser */
5537 /**
5538  * cik_ib_parse - vm ib_parse callback
5539  *
5540  * @rdev: radeon_device pointer
5541  * @ib: indirect buffer pointer
5542  *
5543  * CIK uses hw IB checking so this is a nop (CIK).
5544  */
5545 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5546 {
5547 	return 0;
5548 }
5549 
5550 /*
5551  * vm
5552  * VMID 0 is the physical GPU addresses as used by the kernel.
5553  * VMIDs 1-15 are used for userspace clients and are handled
5554  * by the radeon vm/hsa code.
5555  */
5556 /**
5557  * cik_vm_init - cik vm init callback
5558  *
5559  * @rdev: radeon_device pointer
5560  *
5561  * Inits cik specific vm parameters (number of VMs, base of vram for
5562  * VMIDs 1-15) (CIK).
5563  * Returns 0 for success.
5564  */
5565 int cik_vm_init(struct radeon_device *rdev)
5566 {
5567 	/* number of VMs */
5568 	rdev->vm_manager.nvm = 16;
5569 	/* base offset of vram pages */
5570 	if (rdev->flags & RADEON_IS_IGP) {
5571 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5572 		tmp <<= 22;
5573 		rdev->vm_manager.vram_base_offset = tmp;
5574 	} else
5575 		rdev->vm_manager.vram_base_offset = 0;
5576 
5577 	return 0;
5578 }
5579 
5580 /**
5581  * cik_vm_fini - cik vm fini callback
5582  *
5583  * @rdev: radeon_device pointer
5584  *
5585  * Tear down any asic specific VM setup (CIK).
5586  */
5587 void cik_vm_fini(struct radeon_device *rdev)
5588 {
5589 }
5590 
5591 /**
5592  * cik_vm_decode_fault - print human readable fault info
5593  *
5594  * @rdev: radeon_device pointer
5595  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5596  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5597  *
5598  * Print human readable fault information (CIK).
5599  */
5600 static void cik_vm_decode_fault(struct radeon_device *rdev,
5601 				u32 status, u32 addr, u32 mc_client)
5602 {
5603 	u32 mc_id;
5604 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5605 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5606 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5607 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5608 
5609 	if (rdev->family == CHIP_HAWAII)
5610 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5611 	else
5612 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5613 
5614 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5615 	       protections, vmid, addr,
5616 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5617 	       block, mc_client, mc_id);
5618 }
5619 
5620 /**
5621  * cik_vm_flush - cik vm flush using the CP
5622  *
5623  * @rdev: radeon_device pointer
5624  *
5625  * Update the page table base and flush the VM TLB
5626  * using the CP (CIK).
5627  */
5628 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5629 {
5630 	struct radeon_ring *ring = &rdev->ring[ridx];
5631 
5632 	if (vm == NULL)
5633 		return;
5634 
5635 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5636 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5637 				 WRITE_DATA_DST_SEL(0)));
5638 	if (vm->id < 8) {
5639 		radeon_ring_write(ring,
5640 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5641 	} else {
5642 		radeon_ring_write(ring,
5643 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5644 	}
5645 	radeon_ring_write(ring, 0);
5646 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5647 
5648 	/* update SH_MEM_* regs */
5649 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651 				 WRITE_DATA_DST_SEL(0)));
5652 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5653 	radeon_ring_write(ring, 0);
5654 	radeon_ring_write(ring, VMID(vm->id));
5655 
5656 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5657 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5658 				 WRITE_DATA_DST_SEL(0)));
5659 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5660 	radeon_ring_write(ring, 0);
5661 
5662 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5663 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5664 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5665 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5666 
5667 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5668 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5669 				 WRITE_DATA_DST_SEL(0)));
5670 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5671 	radeon_ring_write(ring, 0);
5672 	radeon_ring_write(ring, VMID(0));
5673 
5674 	/* HDP flush */
5675 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5676 
5677 	/* bits 0-15 are the VM contexts0-15 */
5678 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5679 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5680 				 WRITE_DATA_DST_SEL(0)));
5681 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5682 	radeon_ring_write(ring, 0);
5683 	radeon_ring_write(ring, 1 << vm->id);
5684 
5685 	/* compute doesn't have PFP */
5686 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5687 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5688 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5689 		radeon_ring_write(ring, 0x0);
5690 	}
5691 }
5692 
5693 /*
5694  * RLC
5695  * The RLC is a multi-purpose microengine that handles a
5696  * variety of functions, the most important of which is
5697  * the interrupt controller.
5698  */
5699 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5700 					  bool enable)
5701 {
5702 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5703 
5704 	if (enable)
5705 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5706 	else
5707 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5708 	WREG32(CP_INT_CNTL_RING0, tmp);
5709 }
5710 
5711 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5712 {
5713 	u32 tmp;
5714 
5715 	tmp = RREG32(RLC_LB_CNTL);
5716 	if (enable)
5717 		tmp |= LOAD_BALANCE_ENABLE;
5718 	else
5719 		tmp &= ~LOAD_BALANCE_ENABLE;
5720 	WREG32(RLC_LB_CNTL, tmp);
5721 }
5722 
5723 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5724 {
5725 	u32 i, j, k;
5726 	u32 mask;
5727 
5728 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5729 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5730 			cik_select_se_sh(rdev, i, j);
5731 			for (k = 0; k < rdev->usec_timeout; k++) {
5732 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5733 					break;
5734 				udelay(1);
5735 			}
5736 		}
5737 	}
5738 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5739 
5740 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5741 	for (k = 0; k < rdev->usec_timeout; k++) {
5742 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5743 			break;
5744 		udelay(1);
5745 	}
5746 }
5747 
5748 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5749 {
5750 	u32 tmp;
5751 
5752 	tmp = RREG32(RLC_CNTL);
5753 	if (tmp != rlc)
5754 		WREG32(RLC_CNTL, rlc);
5755 }
5756 
5757 static u32 cik_halt_rlc(struct radeon_device *rdev)
5758 {
5759 	u32 data, orig;
5760 
5761 	orig = data = RREG32(RLC_CNTL);
5762 
5763 	if (data & RLC_ENABLE) {
5764 		u32 i;
5765 
5766 		data &= ~RLC_ENABLE;
5767 		WREG32(RLC_CNTL, data);
5768 
5769 		for (i = 0; i < rdev->usec_timeout; i++) {
5770 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5771 				break;
5772 			udelay(1);
5773 		}
5774 
5775 		cik_wait_for_rlc_serdes(rdev);
5776 	}
5777 
5778 	return orig;
5779 }
5780 
5781 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5782 {
5783 	u32 tmp, i, mask;
5784 
5785 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5786 	WREG32(RLC_GPR_REG2, tmp);
5787 
5788 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5789 	for (i = 0; i < rdev->usec_timeout; i++) {
5790 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5791 			break;
5792 		udelay(1);
5793 	}
5794 
5795 	for (i = 0; i < rdev->usec_timeout; i++) {
5796 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5797 			break;
5798 		udelay(1);
5799 	}
5800 }
5801 
5802 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5803 {
5804 	u32 tmp;
5805 
5806 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5807 	WREG32(RLC_GPR_REG2, tmp);
5808 }
5809 
5810 /**
5811  * cik_rlc_stop - stop the RLC ME
5812  *
5813  * @rdev: radeon_device pointer
5814  *
5815  * Halt the RLC ME (MicroEngine) (CIK).
5816  */
5817 static void cik_rlc_stop(struct radeon_device *rdev)
5818 {
5819 	WREG32(RLC_CNTL, 0);
5820 
5821 	cik_enable_gui_idle_interrupt(rdev, false);
5822 
5823 	cik_wait_for_rlc_serdes(rdev);
5824 }
5825 
5826 /**
5827  * cik_rlc_start - start the RLC ME
5828  *
5829  * @rdev: radeon_device pointer
5830  *
5831  * Unhalt the RLC ME (MicroEngine) (CIK).
5832  */
5833 static void cik_rlc_start(struct radeon_device *rdev)
5834 {
5835 	WREG32(RLC_CNTL, RLC_ENABLE);
5836 
5837 	cik_enable_gui_idle_interrupt(rdev, true);
5838 
5839 	udelay(50);
5840 }
5841 
5842 /**
5843  * cik_rlc_resume - setup the RLC hw
5844  *
5845  * @rdev: radeon_device pointer
5846  *
5847  * Initialize the RLC registers, load the ucode,
5848  * and start the RLC (CIK).
5849  * Returns 0 for success, -EINVAL if the ucode is not available.
5850  */
5851 static int cik_rlc_resume(struct radeon_device *rdev)
5852 {
5853 	u32 i, size, tmp;
5854 	const __be32 *fw_data;
5855 
5856 	if (!rdev->rlc_fw)
5857 		return -EINVAL;
5858 
5859 	switch (rdev->family) {
5860 	case CHIP_BONAIRE:
5861 	case CHIP_HAWAII:
5862 	default:
5863 		size = BONAIRE_RLC_UCODE_SIZE;
5864 		break;
5865 	case CHIP_KAVERI:
5866 		size = KV_RLC_UCODE_SIZE;
5867 		break;
5868 	case CHIP_KABINI:
5869 		size = KB_RLC_UCODE_SIZE;
5870 		break;
5871 	case CHIP_MULLINS:
5872 		size = ML_RLC_UCODE_SIZE;
5873 		break;
5874 	}
5875 
5876 	cik_rlc_stop(rdev);
5877 
5878 	/* disable CG */
5879 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5880 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5881 
5882 	si_rlc_reset(rdev);
5883 
5884 	cik_init_pg(rdev);
5885 
5886 	cik_init_cg(rdev);
5887 
5888 	WREG32(RLC_LB_CNTR_INIT, 0);
5889 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5890 
5891 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5892 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5893 	WREG32(RLC_LB_PARAMS, 0x00600408);
5894 	WREG32(RLC_LB_CNTL, 0x80000004);
5895 
5896 	WREG32(RLC_MC_CNTL, 0);
5897 	WREG32(RLC_UCODE_CNTL, 0);
5898 
5899 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5900 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5901 	for (i = 0; i < size; i++)
5902 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5903 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5904 
5905 	/* XXX - find out what chips support lbpw */
5906 	cik_enable_lbpw(rdev, false);
5907 
5908 	if (rdev->family == CHIP_BONAIRE)
5909 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5910 
5911 	cik_rlc_start(rdev);
5912 
5913 	return 0;
5914 }
5915 
5916 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5917 {
5918 	u32 data, orig, tmp, tmp2;
5919 
5920 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5921 
5922 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5923 		cik_enable_gui_idle_interrupt(rdev, true);
5924 
5925 		tmp = cik_halt_rlc(rdev);
5926 
5927 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5928 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5929 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5930 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5931 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5932 
5933 		cik_update_rlc(rdev, tmp);
5934 
5935 		data |= CGCG_EN | CGLS_EN;
5936 	} else {
5937 		cik_enable_gui_idle_interrupt(rdev, false);
5938 
5939 		RREG32(CB_CGTT_SCLK_CTRL);
5940 		RREG32(CB_CGTT_SCLK_CTRL);
5941 		RREG32(CB_CGTT_SCLK_CTRL);
5942 		RREG32(CB_CGTT_SCLK_CTRL);
5943 
5944 		data &= ~(CGCG_EN | CGLS_EN);
5945 	}
5946 
5947 	if (orig != data)
5948 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5949 
5950 }
5951 
5952 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5953 {
5954 	u32 data, orig, tmp = 0;
5955 
5956 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5957 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5958 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5959 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5960 				data |= CP_MEM_LS_EN;
5961 				if (orig != data)
5962 					WREG32(CP_MEM_SLP_CNTL, data);
5963 			}
5964 		}
5965 
5966 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5967 		data &= 0xfffffffd;
5968 		if (orig != data)
5969 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5970 
5971 		tmp = cik_halt_rlc(rdev);
5972 
5973 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5974 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5975 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5976 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5977 		WREG32(RLC_SERDES_WR_CTRL, data);
5978 
5979 		cik_update_rlc(rdev, tmp);
5980 
5981 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5982 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5983 			data &= ~SM_MODE_MASK;
5984 			data |= SM_MODE(0x2);
5985 			data |= SM_MODE_ENABLE;
5986 			data &= ~CGTS_OVERRIDE;
5987 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5988 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5989 				data &= ~CGTS_LS_OVERRIDE;
5990 			data &= ~ON_MONITOR_ADD_MASK;
5991 			data |= ON_MONITOR_ADD_EN;
5992 			data |= ON_MONITOR_ADD(0x96);
5993 			if (orig != data)
5994 				WREG32(CGTS_SM_CTRL_REG, data);
5995 		}
5996 	} else {
5997 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5998 		data |= 0x00000002;
5999 		if (orig != data)
6000 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6001 
6002 		data = RREG32(RLC_MEM_SLP_CNTL);
6003 		if (data & RLC_MEM_LS_EN) {
6004 			data &= ~RLC_MEM_LS_EN;
6005 			WREG32(RLC_MEM_SLP_CNTL, data);
6006 		}
6007 
6008 		data = RREG32(CP_MEM_SLP_CNTL);
6009 		if (data & CP_MEM_LS_EN) {
6010 			data &= ~CP_MEM_LS_EN;
6011 			WREG32(CP_MEM_SLP_CNTL, data);
6012 		}
6013 
6014 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6015 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6016 		if (orig != data)
6017 			WREG32(CGTS_SM_CTRL_REG, data);
6018 
6019 		tmp = cik_halt_rlc(rdev);
6020 
6021 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6023 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6024 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6025 		WREG32(RLC_SERDES_WR_CTRL, data);
6026 
6027 		cik_update_rlc(rdev, tmp);
6028 	}
6029 }
6030 
6031 static const u32 mc_cg_registers[] =
6032 {
6033 	MC_HUB_MISC_HUB_CG,
6034 	MC_HUB_MISC_SIP_CG,
6035 	MC_HUB_MISC_VM_CG,
6036 	MC_XPB_CLK_GAT,
6037 	ATC_MISC_CG,
6038 	MC_CITF_MISC_WR_CG,
6039 	MC_CITF_MISC_RD_CG,
6040 	MC_CITF_MISC_VM_CG,
6041 	VM_L2_CG,
6042 };
6043 
6044 static void cik_enable_mc_ls(struct radeon_device *rdev,
6045 			     bool enable)
6046 {
6047 	int i;
6048 	u32 orig, data;
6049 
6050 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6051 		orig = data = RREG32(mc_cg_registers[i]);
6052 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6053 			data |= MC_LS_ENABLE;
6054 		else
6055 			data &= ~MC_LS_ENABLE;
6056 		if (data != orig)
6057 			WREG32(mc_cg_registers[i], data);
6058 	}
6059 }
6060 
6061 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6062 			       bool enable)
6063 {
6064 	int i;
6065 	u32 orig, data;
6066 
6067 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6068 		orig = data = RREG32(mc_cg_registers[i]);
6069 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6070 			data |= MC_CG_ENABLE;
6071 		else
6072 			data &= ~MC_CG_ENABLE;
6073 		if (data != orig)
6074 			WREG32(mc_cg_registers[i], data);
6075 	}
6076 }
6077 
6078 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6079 				 bool enable)
6080 {
6081 	u32 orig, data;
6082 
6083 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6084 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6085 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6086 	} else {
6087 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6088 		data |= 0xff000000;
6089 		if (data != orig)
6090 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6091 
6092 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6093 		data |= 0xff000000;
6094 		if (data != orig)
6095 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6096 	}
6097 }
6098 
6099 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6100 				 bool enable)
6101 {
6102 	u32 orig, data;
6103 
6104 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6105 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6106 		data |= 0x100;
6107 		if (orig != data)
6108 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6109 
6110 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6111 		data |= 0x100;
6112 		if (orig != data)
6113 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6114 	} else {
6115 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6116 		data &= ~0x100;
6117 		if (orig != data)
6118 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6119 
6120 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6121 		data &= ~0x100;
6122 		if (orig != data)
6123 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6124 	}
6125 }
6126 
6127 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6128 				bool enable)
6129 {
6130 	u32 orig, data;
6131 
6132 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6133 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6134 		data = 0xfff;
6135 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6136 
6137 		orig = data = RREG32(UVD_CGC_CTRL);
6138 		data |= DCM;
6139 		if (orig != data)
6140 			WREG32(UVD_CGC_CTRL, data);
6141 	} else {
6142 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6143 		data &= ~0xfff;
6144 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6145 
6146 		orig = data = RREG32(UVD_CGC_CTRL);
6147 		data &= ~DCM;
6148 		if (orig != data)
6149 			WREG32(UVD_CGC_CTRL, data);
6150 	}
6151 }
6152 
6153 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6154 			       bool enable)
6155 {
6156 	u32 orig, data;
6157 
6158 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6159 
6160 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6161 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6162 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6163 	else
6164 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6165 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6166 
6167 	if (orig != data)
6168 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6169 }
6170 
6171 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6172 				bool enable)
6173 {
6174 	u32 orig, data;
6175 
6176 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6177 
6178 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6179 		data &= ~CLOCK_GATING_DIS;
6180 	else
6181 		data |= CLOCK_GATING_DIS;
6182 
6183 	if (orig != data)
6184 		WREG32(HDP_HOST_PATH_CNTL, data);
6185 }
6186 
6187 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6188 			      bool enable)
6189 {
6190 	u32 orig, data;
6191 
6192 	orig = data = RREG32(HDP_MEM_POWER_LS);
6193 
6194 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6195 		data |= HDP_LS_ENABLE;
6196 	else
6197 		data &= ~HDP_LS_ENABLE;
6198 
6199 	if (orig != data)
6200 		WREG32(HDP_MEM_POWER_LS, data);
6201 }
6202 
6203 void cik_update_cg(struct radeon_device *rdev,
6204 		   u32 block, bool enable)
6205 {
6206 
6207 	if (block & RADEON_CG_BLOCK_GFX) {
6208 		cik_enable_gui_idle_interrupt(rdev, false);
6209 		/* order matters! */
6210 		if (enable) {
6211 			cik_enable_mgcg(rdev, true);
6212 			cik_enable_cgcg(rdev, true);
6213 		} else {
6214 			cik_enable_cgcg(rdev, false);
6215 			cik_enable_mgcg(rdev, false);
6216 		}
6217 		cik_enable_gui_idle_interrupt(rdev, true);
6218 	}
6219 
6220 	if (block & RADEON_CG_BLOCK_MC) {
6221 		if (!(rdev->flags & RADEON_IS_IGP)) {
6222 			cik_enable_mc_mgcg(rdev, enable);
6223 			cik_enable_mc_ls(rdev, enable);
6224 		}
6225 	}
6226 
6227 	if (block & RADEON_CG_BLOCK_SDMA) {
6228 		cik_enable_sdma_mgcg(rdev, enable);
6229 		cik_enable_sdma_mgls(rdev, enable);
6230 	}
6231 
6232 	if (block & RADEON_CG_BLOCK_BIF) {
6233 		cik_enable_bif_mgls(rdev, enable);
6234 	}
6235 
6236 	if (block & RADEON_CG_BLOCK_UVD) {
6237 		if (rdev->has_uvd)
6238 			cik_enable_uvd_mgcg(rdev, enable);
6239 	}
6240 
6241 	if (block & RADEON_CG_BLOCK_HDP) {
6242 		cik_enable_hdp_mgcg(rdev, enable);
6243 		cik_enable_hdp_ls(rdev, enable);
6244 	}
6245 
6246 	if (block & RADEON_CG_BLOCK_VCE) {
6247 		vce_v2_0_enable_mgcg(rdev, enable);
6248 	}
6249 }
6250 
6251 static void cik_init_cg(struct radeon_device *rdev)
6252 {
6253 
6254 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6255 
6256 	if (rdev->has_uvd)
6257 		si_init_uvd_internal_cg(rdev);
6258 
6259 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6260 			     RADEON_CG_BLOCK_SDMA |
6261 			     RADEON_CG_BLOCK_BIF |
6262 			     RADEON_CG_BLOCK_UVD |
6263 			     RADEON_CG_BLOCK_HDP), true);
6264 }
6265 
6266 static void cik_fini_cg(struct radeon_device *rdev)
6267 {
6268 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6269 			     RADEON_CG_BLOCK_SDMA |
6270 			     RADEON_CG_BLOCK_BIF |
6271 			     RADEON_CG_BLOCK_UVD |
6272 			     RADEON_CG_BLOCK_HDP), false);
6273 
6274 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6275 }
6276 
6277 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6278 					  bool enable)
6279 {
6280 	u32 data, orig;
6281 
6282 	orig = data = RREG32(RLC_PG_CNTL);
6283 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6284 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6285 	else
6286 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6287 	if (orig != data)
6288 		WREG32(RLC_PG_CNTL, data);
6289 }
6290 
6291 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6292 					  bool enable)
6293 {
6294 	u32 data, orig;
6295 
6296 	orig = data = RREG32(RLC_PG_CNTL);
6297 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6298 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6299 	else
6300 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6301 	if (orig != data)
6302 		WREG32(RLC_PG_CNTL, data);
6303 }
6304 
6305 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6306 {
6307 	u32 data, orig;
6308 
6309 	orig = data = RREG32(RLC_PG_CNTL);
6310 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6311 		data &= ~DISABLE_CP_PG;
6312 	else
6313 		data |= DISABLE_CP_PG;
6314 	if (orig != data)
6315 		WREG32(RLC_PG_CNTL, data);
6316 }
6317 
6318 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6319 {
6320 	u32 data, orig;
6321 
6322 	orig = data = RREG32(RLC_PG_CNTL);
6323 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6324 		data &= ~DISABLE_GDS_PG;
6325 	else
6326 		data |= DISABLE_GDS_PG;
6327 	if (orig != data)
6328 		WREG32(RLC_PG_CNTL, data);
6329 }
6330 
6331 #define CP_ME_TABLE_SIZE    96
6332 #define CP_ME_TABLE_OFFSET  2048
6333 #define CP_MEC_TABLE_OFFSET 4096
6334 
6335 void cik_init_cp_pg_table(struct radeon_device *rdev)
6336 {
6337 	const __be32 *fw_data;
6338 	volatile u32 *dst_ptr;
6339 	int me, i, max_me = 4;
6340 	u32 bo_offset = 0;
6341 	u32 table_offset;
6342 
6343 	if (rdev->family == CHIP_KAVERI)
6344 		max_me = 5;
6345 
6346 	if (rdev->rlc.cp_table_ptr == NULL)
6347 		return;
6348 
6349 	/* write the cp table buffer */
6350 	dst_ptr = rdev->rlc.cp_table_ptr;
6351 	for (me = 0; me < max_me; me++) {
6352 		if (me == 0) {
6353 			fw_data = (const __be32 *)rdev->ce_fw->data;
6354 			table_offset = CP_ME_TABLE_OFFSET;
6355 		} else if (me == 1) {
6356 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6357 			table_offset = CP_ME_TABLE_OFFSET;
6358 		} else if (me == 2) {
6359 			fw_data = (const __be32 *)rdev->me_fw->data;
6360 			table_offset = CP_ME_TABLE_OFFSET;
6361 		} else {
6362 			fw_data = (const __be32 *)rdev->mec_fw->data;
6363 			table_offset = CP_MEC_TABLE_OFFSET;
6364 		}
6365 
6366 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6367 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6368 		}
6369 		bo_offset += CP_ME_TABLE_SIZE;
6370 	}
6371 }
6372 
6373 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6374 				bool enable)
6375 {
6376 	u32 data, orig;
6377 
6378 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6379 		orig = data = RREG32(RLC_PG_CNTL);
6380 		data |= GFX_PG_ENABLE;
6381 		if (orig != data)
6382 			WREG32(RLC_PG_CNTL, data);
6383 
6384 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6385 		data |= AUTO_PG_EN;
6386 		if (orig != data)
6387 			WREG32(RLC_AUTO_PG_CTRL, data);
6388 	} else {
6389 		orig = data = RREG32(RLC_PG_CNTL);
6390 		data &= ~GFX_PG_ENABLE;
6391 		if (orig != data)
6392 			WREG32(RLC_PG_CNTL, data);
6393 
6394 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6395 		data &= ~AUTO_PG_EN;
6396 		if (orig != data)
6397 			WREG32(RLC_AUTO_PG_CTRL, data);
6398 
6399 		data = RREG32(DB_RENDER_CONTROL);
6400 	}
6401 }
6402 
6403 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6404 {
6405 	u32 mask = 0, tmp, tmp1;
6406 	int i;
6407 
6408 	cik_select_se_sh(rdev, se, sh);
6409 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6410 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6411 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6412 
6413 	tmp &= 0xffff0000;
6414 
6415 	tmp |= tmp1;
6416 	tmp >>= 16;
6417 
6418 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6419 		mask <<= 1;
6420 		mask |= 1;
6421 	}
6422 
6423 	return (~tmp) & mask;
6424 }
6425 
6426 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6427 {
6428 	u32 i, j, k, active_cu_number = 0;
6429 	u32 mask, counter, cu_bitmap;
6430 	u32 tmp = 0;
6431 
6432 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6433 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6434 			mask = 1;
6435 			cu_bitmap = 0;
6436 			counter = 0;
6437 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6438 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6439 					if (counter < 2)
6440 						cu_bitmap |= mask;
6441 					counter ++;
6442 				}
6443 				mask <<= 1;
6444 			}
6445 
6446 			active_cu_number += counter;
6447 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6448 		}
6449 	}
6450 
6451 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6452 
6453 	tmp = RREG32(RLC_MAX_PG_CU);
6454 	tmp &= ~MAX_PU_CU_MASK;
6455 	tmp |= MAX_PU_CU(active_cu_number);
6456 	WREG32(RLC_MAX_PG_CU, tmp);
6457 }
6458 
6459 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6460 				       bool enable)
6461 {
6462 	u32 data, orig;
6463 
6464 	orig = data = RREG32(RLC_PG_CNTL);
6465 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6466 		data |= STATIC_PER_CU_PG_ENABLE;
6467 	else
6468 		data &= ~STATIC_PER_CU_PG_ENABLE;
6469 	if (orig != data)
6470 		WREG32(RLC_PG_CNTL, data);
6471 }
6472 
6473 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6474 					bool enable)
6475 {
6476 	u32 data, orig;
6477 
6478 	orig = data = RREG32(RLC_PG_CNTL);
6479 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6480 		data |= DYN_PER_CU_PG_ENABLE;
6481 	else
6482 		data &= ~DYN_PER_CU_PG_ENABLE;
6483 	if (orig != data)
6484 		WREG32(RLC_PG_CNTL, data);
6485 }
6486 
6487 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6488 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6489 
6490 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6491 {
6492 	u32 data, orig;
6493 	u32 i;
6494 
6495 	if (rdev->rlc.cs_data) {
6496 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6497 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6498 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6499 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6500 	} else {
6501 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6502 		for (i = 0; i < 3; i++)
6503 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6504 	}
6505 	if (rdev->rlc.reg_list) {
6506 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6507 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6508 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6509 	}
6510 
6511 	orig = data = RREG32(RLC_PG_CNTL);
6512 	data |= GFX_PG_SRC;
6513 	if (orig != data)
6514 		WREG32(RLC_PG_CNTL, data);
6515 
6516 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6517 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6518 
6519 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6520 	data &= ~IDLE_POLL_COUNT_MASK;
6521 	data |= IDLE_POLL_COUNT(0x60);
6522 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6523 
6524 	data = 0x10101010;
6525 	WREG32(RLC_PG_DELAY, data);
6526 
6527 	data = RREG32(RLC_PG_DELAY_2);
6528 	data &= ~0xff;
6529 	data |= 0x3;
6530 	WREG32(RLC_PG_DELAY_2, data);
6531 
6532 	data = RREG32(RLC_AUTO_PG_CTRL);
6533 	data &= ~GRBM_REG_SGIT_MASK;
6534 	data |= GRBM_REG_SGIT(0x700);
6535 	WREG32(RLC_AUTO_PG_CTRL, data);
6536 
6537 }
6538 
6539 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6540 {
6541 	cik_enable_gfx_cgpg(rdev, enable);
6542 	cik_enable_gfx_static_mgpg(rdev, enable);
6543 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6544 }
6545 
6546 u32 cik_get_csb_size(struct radeon_device *rdev)
6547 {
6548 	u32 count = 0;
6549 	const struct cs_section_def *sect = NULL;
6550 	const struct cs_extent_def *ext = NULL;
6551 
6552 	if (rdev->rlc.cs_data == NULL)
6553 		return 0;
6554 
6555 	/* begin clear state */
6556 	count += 2;
6557 	/* context control state */
6558 	count += 3;
6559 
6560 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6561 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6562 			if (sect->id == SECT_CONTEXT)
6563 				count += 2 + ext->reg_count;
6564 			else
6565 				return 0;
6566 		}
6567 	}
6568 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6569 	count += 4;
6570 	/* end clear state */
6571 	count += 2;
6572 	/* clear state */
6573 	count += 2;
6574 
6575 	return count;
6576 }
6577 
6578 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6579 {
6580 	u32 count = 0, i;
6581 	const struct cs_section_def *sect = NULL;
6582 	const struct cs_extent_def *ext = NULL;
6583 
6584 	if (rdev->rlc.cs_data == NULL)
6585 		return;
6586 	if (buffer == NULL)
6587 		return;
6588 
6589 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6590 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6591 
6592 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6593 	buffer[count++] = cpu_to_le32(0x80000000);
6594 	buffer[count++] = cpu_to_le32(0x80000000);
6595 
6596 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6597 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6598 			if (sect->id == SECT_CONTEXT) {
6599 				buffer[count++] =
6600 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6601 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6602 				for (i = 0; i < ext->reg_count; i++)
6603 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6604 			} else {
6605 				return;
6606 			}
6607 		}
6608 	}
6609 
6610 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6611 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6612 	switch (rdev->family) {
6613 	case CHIP_BONAIRE:
6614 		buffer[count++] = cpu_to_le32(0x16000012);
6615 		buffer[count++] = cpu_to_le32(0x00000000);
6616 		break;
6617 	case CHIP_KAVERI:
6618 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6619 		buffer[count++] = cpu_to_le32(0x00000000);
6620 		break;
6621 	case CHIP_KABINI:
6622 	case CHIP_MULLINS:
6623 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6624 		buffer[count++] = cpu_to_le32(0x00000000);
6625 		break;
6626 	case CHIP_HAWAII:
6627 		buffer[count++] = cpu_to_le32(0x3a00161a);
6628 		buffer[count++] = cpu_to_le32(0x0000002e);
6629 		break;
6630 	default:
6631 		buffer[count++] = cpu_to_le32(0x00000000);
6632 		buffer[count++] = cpu_to_le32(0x00000000);
6633 		break;
6634 	}
6635 
6636 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6637 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6638 
6639 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6640 	buffer[count++] = cpu_to_le32(0);
6641 }
6642 
6643 static void cik_init_pg(struct radeon_device *rdev)
6644 {
6645 	if (rdev->pg_flags) {
6646 		cik_enable_sck_slowdown_on_pu(rdev, true);
6647 		cik_enable_sck_slowdown_on_pd(rdev, true);
6648 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6649 			cik_init_gfx_cgpg(rdev);
6650 			cik_enable_cp_pg(rdev, true);
6651 			cik_enable_gds_pg(rdev, true);
6652 		}
6653 		cik_init_ao_cu_mask(rdev);
6654 		cik_update_gfx_pg(rdev, true);
6655 	}
6656 }
6657 
6658 static void cik_fini_pg(struct radeon_device *rdev)
6659 {
6660 	if (rdev->pg_flags) {
6661 		cik_update_gfx_pg(rdev, false);
6662 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6663 			cik_enable_cp_pg(rdev, false);
6664 			cik_enable_gds_pg(rdev, false);
6665 		}
6666 	}
6667 }
6668 
6669 /*
6670  * Interrupts
6671  * Starting with r6xx, interrupts are handled via a ring buffer.
6672  * Ring buffers are areas of GPU accessible memory that the GPU
6673  * writes interrupt vectors into and the host reads vectors out of.
6674  * There is a rptr (read pointer) that determines where the
6675  * host is currently reading, and a wptr (write pointer)
6676  * which determines where the GPU has written.  When the
6677  * pointers are equal, the ring is idle.  When the GPU
6678  * writes vectors to the ring buffer, it increments the
6679  * wptr.  When there is an interrupt, the host then starts
6680  * fetching commands and processing them until the pointers are
6681  * equal again at which point it updates the rptr.
6682  */
6683 
6684 /**
6685  * cik_enable_interrupts - Enable the interrupt ring buffer
6686  *
6687  * @rdev: radeon_device pointer
6688  *
6689  * Enable the interrupt ring buffer (CIK).
6690  */
6691 static void cik_enable_interrupts(struct radeon_device *rdev)
6692 {
6693 	u32 ih_cntl = RREG32(IH_CNTL);
6694 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6695 
6696 	ih_cntl |= ENABLE_INTR;
6697 	ih_rb_cntl |= IH_RB_ENABLE;
6698 	WREG32(IH_CNTL, ih_cntl);
6699 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6700 	rdev->ih.enabled = true;
6701 }
6702 
6703 /**
6704  * cik_disable_interrupts - Disable the interrupt ring buffer
6705  *
6706  * @rdev: radeon_device pointer
6707  *
6708  * Disable the interrupt ring buffer (CIK).
6709  */
6710 static void cik_disable_interrupts(struct radeon_device *rdev)
6711 {
6712 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6713 	u32 ih_cntl = RREG32(IH_CNTL);
6714 
6715 	ih_rb_cntl &= ~IH_RB_ENABLE;
6716 	ih_cntl &= ~ENABLE_INTR;
6717 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6718 	WREG32(IH_CNTL, ih_cntl);
6719 	/* set rptr, wptr to 0 */
6720 	WREG32(IH_RB_RPTR, 0);
6721 	WREG32(IH_RB_WPTR, 0);
6722 	rdev->ih.enabled = false;
6723 	rdev->ih.rptr = 0;
6724 }
6725 
6726 /**
6727  * cik_disable_interrupt_state - Disable all interrupt sources
6728  *
6729  * @rdev: radeon_device pointer
6730  *
6731  * Clear all interrupt enable bits used by the driver (CIK).
6732  */
6733 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6734 {
6735 	u32 tmp;
6736 
6737 	/* gfx ring */
6738 	tmp = RREG32(CP_INT_CNTL_RING0) &
6739 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6740 	WREG32(CP_INT_CNTL_RING0, tmp);
6741 	/* sdma */
6742 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6743 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6744 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6745 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6746 	/* compute queues */
6747 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6748 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6749 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6750 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6751 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6752 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6753 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6754 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6755 	/* grbm */
6756 	WREG32(GRBM_INT_CNTL, 0);
6757 	/* vline/vblank, etc. */
6758 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6759 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6760 	if (rdev->num_crtc >= 4) {
6761 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6762 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6763 	}
6764 	if (rdev->num_crtc >= 6) {
6765 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6766 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6767 	}
6768 	/* pflip */
6769 	if (rdev->num_crtc >= 2) {
6770 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6771 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6772 	}
6773 	if (rdev->num_crtc >= 4) {
6774 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6775 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6776 	}
6777 	if (rdev->num_crtc >= 6) {
6778 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6779 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6780 	}
6781 
6782 	/* dac hotplug */
6783 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6784 
6785 	/* digital hotplug */
6786 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6787 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6788 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6789 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6790 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6791 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6792 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6793 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6794 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6795 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6796 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6797 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6798 
6799 }
6800 
6801 /**
6802  * cik_irq_init - init and enable the interrupt ring
6803  *
6804  * @rdev: radeon_device pointer
6805  *
6806  * Allocate a ring buffer for the interrupt controller,
6807  * enable the RLC, disable interrupts, enable the IH
6808  * ring buffer and enable it (CIK).
6809  * Called at device load and reume.
6810  * Returns 0 for success, errors for failure.
6811  */
6812 static int cik_irq_init(struct radeon_device *rdev)
6813 {
6814 	int ret = 0;
6815 	int rb_bufsz;
6816 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6817 
6818 	/* allocate ring */
6819 	ret = r600_ih_ring_alloc(rdev);
6820 	if (ret)
6821 		return ret;
6822 
6823 	/* disable irqs */
6824 	cik_disable_interrupts(rdev);
6825 
6826 	/* init rlc */
6827 	ret = cik_rlc_resume(rdev);
6828 	if (ret) {
6829 		r600_ih_ring_fini(rdev);
6830 		return ret;
6831 	}
6832 
6833 	/* setup interrupt control */
6834 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6835 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6836 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6837 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6838 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6839 	 */
6840 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6841 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6842 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6843 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6844 
6845 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6846 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6847 
6848 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6849 		      IH_WPTR_OVERFLOW_CLEAR |
6850 		      (rb_bufsz << 1));
6851 
6852 	if (rdev->wb.enabled)
6853 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6854 
6855 	/* set the writeback address whether it's enabled or not */
6856 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6857 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6858 
6859 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6860 
6861 	/* set rptr, wptr to 0 */
6862 	WREG32(IH_RB_RPTR, 0);
6863 	WREG32(IH_RB_WPTR, 0);
6864 
6865 	/* Default settings for IH_CNTL (disabled at first) */
6866 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6867 	/* RPTR_REARM only works if msi's are enabled */
6868 	if (rdev->msi_enabled)
6869 		ih_cntl |= RPTR_REARM;
6870 	WREG32(IH_CNTL, ih_cntl);
6871 
6872 	/* force the active interrupt state to all disabled */
6873 	cik_disable_interrupt_state(rdev);
6874 
6875 	pci_set_master(rdev->pdev);
6876 
6877 	/* enable irqs */
6878 	cik_enable_interrupts(rdev);
6879 
6880 	return ret;
6881 }
6882 
6883 /**
6884  * cik_irq_set - enable/disable interrupt sources
6885  *
6886  * @rdev: radeon_device pointer
6887  *
6888  * Enable interrupt sources on the GPU (vblanks, hpd,
6889  * etc.) (CIK).
6890  * Returns 0 for success, errors for failure.
6891  */
6892 int cik_irq_set(struct radeon_device *rdev)
6893 {
6894 	u32 cp_int_cntl;
6895 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6896 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6897 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6898 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6899 	u32 grbm_int_cntl = 0;
6900 	u32 dma_cntl, dma_cntl1;
6901 	u32 thermal_int;
6902 
6903 	if (!rdev->irq.installed) {
6904 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6905 		return -EINVAL;
6906 	}
6907 	/* don't enable anything if the ih is disabled */
6908 	if (!rdev->ih.enabled) {
6909 		cik_disable_interrupts(rdev);
6910 		/* force the active interrupt state to all disabled */
6911 		cik_disable_interrupt_state(rdev);
6912 		return 0;
6913 	}
6914 
6915 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6916 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6917 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6918 
6919 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6920 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6921 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6922 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6923 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6924 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6925 
6926 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6927 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6928 
6929 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6930 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6931 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6932 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6933 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6934 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6935 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6936 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6937 
6938 	if (rdev->flags & RADEON_IS_IGP)
6939 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6940 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6941 	else
6942 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6943 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6944 
6945 	/* enable CP interrupts on all rings */
6946 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6947 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6948 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6949 	}
6950 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6951 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6952 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6953 		if (ring->me == 1) {
6954 			switch (ring->pipe) {
6955 			case 0:
6956 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6957 				break;
6958 			case 1:
6959 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6960 				break;
6961 			case 2:
6962 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6963 				break;
6964 			case 3:
6965 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6966 				break;
6967 			default:
6968 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6969 				break;
6970 			}
6971 		} else if (ring->me == 2) {
6972 			switch (ring->pipe) {
6973 			case 0:
6974 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6975 				break;
6976 			case 1:
6977 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6978 				break;
6979 			case 2:
6980 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6981 				break;
6982 			case 3:
6983 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6984 				break;
6985 			default:
6986 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6987 				break;
6988 			}
6989 		} else {
6990 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6991 		}
6992 	}
6993 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6994 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6995 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6996 		if (ring->me == 1) {
6997 			switch (ring->pipe) {
6998 			case 0:
6999 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7000 				break;
7001 			case 1:
7002 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7003 				break;
7004 			case 2:
7005 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7006 				break;
7007 			case 3:
7008 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7009 				break;
7010 			default:
7011 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7012 				break;
7013 			}
7014 		} else if (ring->me == 2) {
7015 			switch (ring->pipe) {
7016 			case 0:
7017 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7018 				break;
7019 			case 1:
7020 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7021 				break;
7022 			case 2:
7023 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7024 				break;
7025 			case 3:
7026 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7027 				break;
7028 			default:
7029 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7030 				break;
7031 			}
7032 		} else {
7033 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7034 		}
7035 	}
7036 
7037 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7038 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7039 		dma_cntl |= TRAP_ENABLE;
7040 	}
7041 
7042 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7043 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7044 		dma_cntl1 |= TRAP_ENABLE;
7045 	}
7046 
7047 	if (rdev->irq.crtc_vblank_int[0] ||
7048 	    atomic_read(&rdev->irq.pflip[0])) {
7049 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7050 		crtc1 |= VBLANK_INTERRUPT_MASK;
7051 	}
7052 	if (rdev->irq.crtc_vblank_int[1] ||
7053 	    atomic_read(&rdev->irq.pflip[1])) {
7054 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7055 		crtc2 |= VBLANK_INTERRUPT_MASK;
7056 	}
7057 	if (rdev->irq.crtc_vblank_int[2] ||
7058 	    atomic_read(&rdev->irq.pflip[2])) {
7059 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7060 		crtc3 |= VBLANK_INTERRUPT_MASK;
7061 	}
7062 	if (rdev->irq.crtc_vblank_int[3] ||
7063 	    atomic_read(&rdev->irq.pflip[3])) {
7064 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7065 		crtc4 |= VBLANK_INTERRUPT_MASK;
7066 	}
7067 	if (rdev->irq.crtc_vblank_int[4] ||
7068 	    atomic_read(&rdev->irq.pflip[4])) {
7069 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7070 		crtc5 |= VBLANK_INTERRUPT_MASK;
7071 	}
7072 	if (rdev->irq.crtc_vblank_int[5] ||
7073 	    atomic_read(&rdev->irq.pflip[5])) {
7074 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7075 		crtc6 |= VBLANK_INTERRUPT_MASK;
7076 	}
7077 	if (rdev->irq.hpd[0]) {
7078 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7079 		hpd1 |= DC_HPDx_INT_EN;
7080 	}
7081 	if (rdev->irq.hpd[1]) {
7082 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7083 		hpd2 |= DC_HPDx_INT_EN;
7084 	}
7085 	if (rdev->irq.hpd[2]) {
7086 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7087 		hpd3 |= DC_HPDx_INT_EN;
7088 	}
7089 	if (rdev->irq.hpd[3]) {
7090 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7091 		hpd4 |= DC_HPDx_INT_EN;
7092 	}
7093 	if (rdev->irq.hpd[4]) {
7094 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7095 		hpd5 |= DC_HPDx_INT_EN;
7096 	}
7097 	if (rdev->irq.hpd[5]) {
7098 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7099 		hpd6 |= DC_HPDx_INT_EN;
7100 	}
7101 
7102 	if (rdev->irq.dpm_thermal) {
7103 		DRM_DEBUG("dpm thermal\n");
7104 		if (rdev->flags & RADEON_IS_IGP)
7105 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7106 		else
7107 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7108 	}
7109 
7110 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7111 
7112 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7113 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7114 
7115 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7116 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7117 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7118 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7119 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7120 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7121 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7122 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7123 
7124 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7125 
7126 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7127 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7128 	if (rdev->num_crtc >= 4) {
7129 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7130 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7131 	}
7132 	if (rdev->num_crtc >= 6) {
7133 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7134 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7135 	}
7136 
7137 	if (rdev->num_crtc >= 2) {
7138 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7139 		       GRPH_PFLIP_INT_MASK);
7140 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7141 		       GRPH_PFLIP_INT_MASK);
7142 	}
7143 	if (rdev->num_crtc >= 4) {
7144 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7145 		       GRPH_PFLIP_INT_MASK);
7146 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7147 		       GRPH_PFLIP_INT_MASK);
7148 	}
7149 	if (rdev->num_crtc >= 6) {
7150 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7151 		       GRPH_PFLIP_INT_MASK);
7152 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7153 		       GRPH_PFLIP_INT_MASK);
7154 	}
7155 
7156 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7157 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7158 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7159 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7160 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7161 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7162 
7163 	if (rdev->flags & RADEON_IS_IGP)
7164 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7165 	else
7166 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7167 
7168 	return 0;
7169 }
7170 
7171 /**
7172  * cik_irq_ack - ack interrupt sources
7173  *
7174  * @rdev: radeon_device pointer
7175  *
7176  * Ack interrupt sources on the GPU (vblanks, hpd,
7177  * etc.) (CIK).  Certain interrupts sources are sw
7178  * generated and do not require an explicit ack.
7179  */
7180 static inline void cik_irq_ack(struct radeon_device *rdev)
7181 {
7182 	u32 tmp;
7183 
7184 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7185 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7186 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7187 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7188 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7189 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7190 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7191 
7192 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7193 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7194 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7195 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7196 	if (rdev->num_crtc >= 4) {
7197 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7198 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7199 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7200 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7201 	}
7202 	if (rdev->num_crtc >= 6) {
7203 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7204 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7205 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7206 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7207 	}
7208 
7209 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7210 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7211 		       GRPH_PFLIP_INT_CLEAR);
7212 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7213 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7214 		       GRPH_PFLIP_INT_CLEAR);
7215 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7216 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7217 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7218 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7219 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7220 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7221 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7222 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7223 
7224 	if (rdev->num_crtc >= 4) {
7225 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7226 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7227 			       GRPH_PFLIP_INT_CLEAR);
7228 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7229 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7230 			       GRPH_PFLIP_INT_CLEAR);
7231 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7232 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7233 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7234 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7235 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7236 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7237 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7238 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7239 	}
7240 
7241 	if (rdev->num_crtc >= 6) {
7242 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7243 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7244 			       GRPH_PFLIP_INT_CLEAR);
7245 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7246 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7247 			       GRPH_PFLIP_INT_CLEAR);
7248 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7249 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7250 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7251 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7252 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7253 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7254 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7255 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7256 	}
7257 
7258 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7259 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7260 		tmp |= DC_HPDx_INT_ACK;
7261 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7262 	}
7263 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7264 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7265 		tmp |= DC_HPDx_INT_ACK;
7266 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7267 	}
7268 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7269 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7270 		tmp |= DC_HPDx_INT_ACK;
7271 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7272 	}
7273 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7274 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7275 		tmp |= DC_HPDx_INT_ACK;
7276 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7277 	}
7278 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7279 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7280 		tmp |= DC_HPDx_INT_ACK;
7281 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7282 	}
7283 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7284 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7285 		tmp |= DC_HPDx_INT_ACK;
7286 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7287 	}
7288 }
7289 
7290 /**
7291  * cik_irq_disable - disable interrupts
7292  *
7293  * @rdev: radeon_device pointer
7294  *
7295  * Disable interrupts on the hw (CIK).
7296  */
7297 static void cik_irq_disable(struct radeon_device *rdev)
7298 {
7299 	cik_disable_interrupts(rdev);
7300 	/* Wait and acknowledge irq */
7301 	mdelay(1);
7302 	cik_irq_ack(rdev);
7303 	cik_disable_interrupt_state(rdev);
7304 }
7305 
7306 /**
7307  * cik_irq_disable - disable interrupts for suspend
7308  *
7309  * @rdev: radeon_device pointer
7310  *
7311  * Disable interrupts and stop the RLC (CIK).
7312  * Used for suspend.
7313  */
7314 static void cik_irq_suspend(struct radeon_device *rdev)
7315 {
7316 	cik_irq_disable(rdev);
7317 	cik_rlc_stop(rdev);
7318 }
7319 
7320 /**
7321  * cik_irq_fini - tear down interrupt support
7322  *
7323  * @rdev: radeon_device pointer
7324  *
7325  * Disable interrupts on the hw and free the IH ring
7326  * buffer (CIK).
7327  * Used for driver unload.
7328  */
7329 static void cik_irq_fini(struct radeon_device *rdev)
7330 {
7331 	cik_irq_suspend(rdev);
7332 	r600_ih_ring_fini(rdev);
7333 }
7334 
7335 /**
7336  * cik_get_ih_wptr - get the IH ring buffer wptr
7337  *
7338  * @rdev: radeon_device pointer
7339  *
7340  * Get the IH ring buffer wptr from either the register
7341  * or the writeback memory buffer (CIK).  Also check for
7342  * ring buffer overflow and deal with it.
7343  * Used by cik_irq_process().
7344  * Returns the value of the wptr.
7345  */
7346 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7347 {
7348 	u32 wptr, tmp;
7349 
7350 	if (rdev->wb.enabled)
7351 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7352 	else
7353 		wptr = RREG32(IH_RB_WPTR);
7354 
7355 	if (wptr & RB_OVERFLOW) {
7356 		/* When a ring buffer overflow happen start parsing interrupt
7357 		 * from the last not overwritten vector (wptr + 16). Hopefully
7358 		 * this should allow us to catchup.
7359 		 */
7360 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7361 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7362 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7363 		tmp = RREG32(IH_RB_CNTL);
7364 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7365 		WREG32(IH_RB_CNTL, tmp);
7366 	}
7367 	return (wptr & rdev->ih.ptr_mask);
7368 }
7369 
7370 /*        CIK IV Ring
7371  * Each IV ring entry is 128 bits:
7372  * [7:0]    - interrupt source id
7373  * [31:8]   - reserved
7374  * [59:32]  - interrupt source data
7375  * [63:60]  - reserved
7376  * [71:64]  - RINGID
7377  *            CP:
7378  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7379  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7380  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7381  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7382  *            PIPE_ID - ME0 0=3D
7383  *                    - ME1&2 compute dispatcher (4 pipes each)
7384  *            SDMA:
7385  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7386  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7387  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7388  * [79:72]  - VMID
7389  * [95:80]  - PASID
7390  * [127:96] - reserved
7391  */
7392 /**
7393  * cik_irq_process - interrupt handler
7394  *
7395  * @rdev: radeon_device pointer
7396  *
7397  * Interrupt hander (CIK).  Walk the IH ring,
7398  * ack interrupts and schedule work to handle
7399  * interrupt events.
7400  * Returns irq process return code.
7401  */
7402 int cik_irq_process(struct radeon_device *rdev)
7403 {
7404 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7405 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7406 	u32 wptr;
7407 	u32 rptr;
7408 	u32 src_id, src_data, ring_id;
7409 	u8 me_id, pipe_id, queue_id;
7410 	u32 ring_index;
7411 	bool queue_hotplug = false;
7412 	bool queue_reset = false;
7413 	u32 addr, status, mc_client;
7414 	bool queue_thermal = false;
7415 
7416 	if (!rdev->ih.enabled || rdev->shutdown)
7417 		return IRQ_NONE;
7418 
7419 	wptr = cik_get_ih_wptr(rdev);
7420 
7421 restart_ih:
7422 	/* is somebody else already processing irqs? */
7423 	if (atomic_xchg(&rdev->ih.lock, 1))
7424 		return IRQ_NONE;
7425 
7426 	rptr = rdev->ih.rptr;
7427 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7428 
7429 	/* Order reading of wptr vs. reading of IH ring data */
7430 	rmb();
7431 
7432 	/* display interrupts */
7433 	cik_irq_ack(rdev);
7434 
7435 	while (rptr != wptr) {
7436 		/* wptr/rptr are in bytes! */
7437 		ring_index = rptr / 4;
7438 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7439 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7440 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7441 
7442 		switch (src_id) {
7443 		case 1: /* D1 vblank/vline */
7444 			switch (src_data) {
7445 			case 0: /* D1 vblank */
7446 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7447 					if (rdev->irq.crtc_vblank_int[0]) {
7448 						drm_handle_vblank(rdev->ddev, 0);
7449 						rdev->pm.vblank_sync = true;
7450 						wake_up(&rdev->irq.vblank_queue);
7451 					}
7452 					if (atomic_read(&rdev->irq.pflip[0]))
7453 						radeon_crtc_handle_flip(rdev, 0);
7454 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7455 					DRM_DEBUG("IH: D1 vblank\n");
7456 				}
7457 				break;
7458 			case 1: /* D1 vline */
7459 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7460 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7461 					DRM_DEBUG("IH: D1 vline\n");
7462 				}
7463 				break;
7464 			default:
7465 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7466 				break;
7467 			}
7468 			break;
7469 		case 2: /* D2 vblank/vline */
7470 			switch (src_data) {
7471 			case 0: /* D2 vblank */
7472 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7473 					if (rdev->irq.crtc_vblank_int[1]) {
7474 						drm_handle_vblank(rdev->ddev, 1);
7475 						rdev->pm.vblank_sync = true;
7476 						wake_up(&rdev->irq.vblank_queue);
7477 					}
7478 					if (atomic_read(&rdev->irq.pflip[1]))
7479 						radeon_crtc_handle_flip(rdev, 1);
7480 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7481 					DRM_DEBUG("IH: D2 vblank\n");
7482 				}
7483 				break;
7484 			case 1: /* D2 vline */
7485 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7486 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7487 					DRM_DEBUG("IH: D2 vline\n");
7488 				}
7489 				break;
7490 			default:
7491 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7492 				break;
7493 			}
7494 			break;
7495 		case 3: /* D3 vblank/vline */
7496 			switch (src_data) {
7497 			case 0: /* D3 vblank */
7498 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7499 					if (rdev->irq.crtc_vblank_int[2]) {
7500 						drm_handle_vblank(rdev->ddev, 2);
7501 						rdev->pm.vblank_sync = true;
7502 						wake_up(&rdev->irq.vblank_queue);
7503 					}
7504 					if (atomic_read(&rdev->irq.pflip[2]))
7505 						radeon_crtc_handle_flip(rdev, 2);
7506 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7507 					DRM_DEBUG("IH: D3 vblank\n");
7508 				}
7509 				break;
7510 			case 1: /* D3 vline */
7511 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7512 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7513 					DRM_DEBUG("IH: D3 vline\n");
7514 				}
7515 				break;
7516 			default:
7517 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7518 				break;
7519 			}
7520 			break;
7521 		case 4: /* D4 vblank/vline */
7522 			switch (src_data) {
7523 			case 0: /* D4 vblank */
7524 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7525 					if (rdev->irq.crtc_vblank_int[3]) {
7526 						drm_handle_vblank(rdev->ddev, 3);
7527 						rdev->pm.vblank_sync = true;
7528 						wake_up(&rdev->irq.vblank_queue);
7529 					}
7530 					if (atomic_read(&rdev->irq.pflip[3]))
7531 						radeon_crtc_handle_flip(rdev, 3);
7532 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7533 					DRM_DEBUG("IH: D4 vblank\n");
7534 				}
7535 				break;
7536 			case 1: /* D4 vline */
7537 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7538 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7539 					DRM_DEBUG("IH: D4 vline\n");
7540 				}
7541 				break;
7542 			default:
7543 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7544 				break;
7545 			}
7546 			break;
7547 		case 5: /* D5 vblank/vline */
7548 			switch (src_data) {
7549 			case 0: /* D5 vblank */
7550 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7551 					if (rdev->irq.crtc_vblank_int[4]) {
7552 						drm_handle_vblank(rdev->ddev, 4);
7553 						rdev->pm.vblank_sync = true;
7554 						wake_up(&rdev->irq.vblank_queue);
7555 					}
7556 					if (atomic_read(&rdev->irq.pflip[4]))
7557 						radeon_crtc_handle_flip(rdev, 4);
7558 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7559 					DRM_DEBUG("IH: D5 vblank\n");
7560 				}
7561 				break;
7562 			case 1: /* D5 vline */
7563 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7564 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7565 					DRM_DEBUG("IH: D5 vline\n");
7566 				}
7567 				break;
7568 			default:
7569 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7570 				break;
7571 			}
7572 			break;
7573 		case 6: /* D6 vblank/vline */
7574 			switch (src_data) {
7575 			case 0: /* D6 vblank */
7576 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7577 					if (rdev->irq.crtc_vblank_int[5]) {
7578 						drm_handle_vblank(rdev->ddev, 5);
7579 						rdev->pm.vblank_sync = true;
7580 						wake_up(&rdev->irq.vblank_queue);
7581 					}
7582 					if (atomic_read(&rdev->irq.pflip[5]))
7583 						radeon_crtc_handle_flip(rdev, 5);
7584 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7585 					DRM_DEBUG("IH: D6 vblank\n");
7586 				}
7587 				break;
7588 			case 1: /* D6 vline */
7589 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7590 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7591 					DRM_DEBUG("IH: D6 vline\n");
7592 				}
7593 				break;
7594 			default:
7595 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7596 				break;
7597 			}
7598 			break;
7599 		case 8: /* D1 page flip */
7600 		case 10: /* D2 page flip */
7601 		case 12: /* D3 page flip */
7602 		case 14: /* D4 page flip */
7603 		case 16: /* D5 page flip */
7604 		case 18: /* D6 page flip */
7605 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7606 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7607 			break;
7608 		case 42: /* HPD hotplug */
7609 			switch (src_data) {
7610 			case 0:
7611 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7612 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7613 					queue_hotplug = true;
7614 					DRM_DEBUG("IH: HPD1\n");
7615 				}
7616 				break;
7617 			case 1:
7618 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7619 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7620 					queue_hotplug = true;
7621 					DRM_DEBUG("IH: HPD2\n");
7622 				}
7623 				break;
7624 			case 2:
7625 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7626 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7627 					queue_hotplug = true;
7628 					DRM_DEBUG("IH: HPD3\n");
7629 				}
7630 				break;
7631 			case 3:
7632 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7633 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7634 					queue_hotplug = true;
7635 					DRM_DEBUG("IH: HPD4\n");
7636 				}
7637 				break;
7638 			case 4:
7639 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7640 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7641 					queue_hotplug = true;
7642 					DRM_DEBUG("IH: HPD5\n");
7643 				}
7644 				break;
7645 			case 5:
7646 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7647 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7648 					queue_hotplug = true;
7649 					DRM_DEBUG("IH: HPD6\n");
7650 				}
7651 				break;
7652 			default:
7653 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7654 				break;
7655 			}
7656 			break;
7657 		case 124: /* UVD */
7658 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7659 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7660 			break;
7661 		case 146:
7662 		case 147:
7663 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7664 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7665 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7666 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7667 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7668 				addr);
7669 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7670 				status);
7671 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7672 			/* reset addr and status */
7673 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7674 			break;
7675 		case 167: /* VCE */
7676 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7677 			switch (src_data) {
7678 			case 0:
7679 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7680 				break;
7681 			case 1:
7682 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7683 				break;
7684 			default:
7685 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7686 				break;
7687 			}
7688 			break;
7689 		case 176: /* GFX RB CP_INT */
7690 		case 177: /* GFX IB CP_INT */
7691 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7692 			break;
7693 		case 181: /* CP EOP event */
7694 			DRM_DEBUG("IH: CP EOP\n");
7695 			/* XXX check the bitfield order! */
7696 			me_id = (ring_id & 0x60) >> 5;
7697 			pipe_id = (ring_id & 0x18) >> 3;
7698 			queue_id = (ring_id & 0x7) >> 0;
7699 			switch (me_id) {
7700 			case 0:
7701 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7702 				break;
7703 			case 1:
7704 			case 2:
7705 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7706 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7707 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7708 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7709 				break;
7710 			}
7711 			break;
7712 		case 184: /* CP Privileged reg access */
7713 			DRM_ERROR("Illegal register access in command stream\n");
7714 			/* XXX check the bitfield order! */
7715 			me_id = (ring_id & 0x60) >> 5;
7716 			pipe_id = (ring_id & 0x18) >> 3;
7717 			queue_id = (ring_id & 0x7) >> 0;
7718 			switch (me_id) {
7719 			case 0:
7720 				/* This results in a full GPU reset, but all we need to do is soft
7721 				 * reset the CP for gfx
7722 				 */
7723 				queue_reset = true;
7724 				break;
7725 			case 1:
7726 				/* XXX compute */
7727 				queue_reset = true;
7728 				break;
7729 			case 2:
7730 				/* XXX compute */
7731 				queue_reset = true;
7732 				break;
7733 			}
7734 			break;
7735 		case 185: /* CP Privileged inst */
7736 			DRM_ERROR("Illegal instruction in command stream\n");
7737 			/* XXX check the bitfield order! */
7738 			me_id = (ring_id & 0x60) >> 5;
7739 			pipe_id = (ring_id & 0x18) >> 3;
7740 			queue_id = (ring_id & 0x7) >> 0;
7741 			switch (me_id) {
7742 			case 0:
7743 				/* This results in a full GPU reset, but all we need to do is soft
7744 				 * reset the CP for gfx
7745 				 */
7746 				queue_reset = true;
7747 				break;
7748 			case 1:
7749 				/* XXX compute */
7750 				queue_reset = true;
7751 				break;
7752 			case 2:
7753 				/* XXX compute */
7754 				queue_reset = true;
7755 				break;
7756 			}
7757 			break;
7758 		case 224: /* SDMA trap event */
7759 			/* XXX check the bitfield order! */
7760 			me_id = (ring_id & 0x3) >> 0;
7761 			queue_id = (ring_id & 0xc) >> 2;
7762 			DRM_DEBUG("IH: SDMA trap\n");
7763 			switch (me_id) {
7764 			case 0:
7765 				switch (queue_id) {
7766 				case 0:
7767 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7768 					break;
7769 				case 1:
7770 					/* XXX compute */
7771 					break;
7772 				case 2:
7773 					/* XXX compute */
7774 					break;
7775 				}
7776 				break;
7777 			case 1:
7778 				switch (queue_id) {
7779 				case 0:
7780 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7781 					break;
7782 				case 1:
7783 					/* XXX compute */
7784 					break;
7785 				case 2:
7786 					/* XXX compute */
7787 					break;
7788 				}
7789 				break;
7790 			}
7791 			break;
7792 		case 230: /* thermal low to high */
7793 			DRM_DEBUG("IH: thermal low to high\n");
7794 			rdev->pm.dpm.thermal.high_to_low = false;
7795 			queue_thermal = true;
7796 			break;
7797 		case 231: /* thermal high to low */
7798 			DRM_DEBUG("IH: thermal high to low\n");
7799 			rdev->pm.dpm.thermal.high_to_low = true;
7800 			queue_thermal = true;
7801 			break;
7802 		case 233: /* GUI IDLE */
7803 			DRM_DEBUG("IH: GUI idle\n");
7804 			break;
7805 		case 241: /* SDMA Privileged inst */
7806 		case 247: /* SDMA Privileged inst */
7807 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7808 			/* XXX check the bitfield order! */
7809 			me_id = (ring_id & 0x3) >> 0;
7810 			queue_id = (ring_id & 0xc) >> 2;
7811 			switch (me_id) {
7812 			case 0:
7813 				switch (queue_id) {
7814 				case 0:
7815 					queue_reset = true;
7816 					break;
7817 				case 1:
7818 					/* XXX compute */
7819 					queue_reset = true;
7820 					break;
7821 				case 2:
7822 					/* XXX compute */
7823 					queue_reset = true;
7824 					break;
7825 				}
7826 				break;
7827 			case 1:
7828 				switch (queue_id) {
7829 				case 0:
7830 					queue_reset = true;
7831 					break;
7832 				case 1:
7833 					/* XXX compute */
7834 					queue_reset = true;
7835 					break;
7836 				case 2:
7837 					/* XXX compute */
7838 					queue_reset = true;
7839 					break;
7840 				}
7841 				break;
7842 			}
7843 			break;
7844 		default:
7845 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7846 			break;
7847 		}
7848 
7849 		/* wptr/rptr are in bytes! */
7850 		rptr += 16;
7851 		rptr &= rdev->ih.ptr_mask;
7852 	}
7853 	if (queue_hotplug)
7854 		schedule_work(&rdev->hotplug_work);
7855 	if (queue_reset)
7856 		schedule_work(&rdev->reset_work);
7857 	if (queue_thermal)
7858 		schedule_work(&rdev->pm.dpm.thermal.work);
7859 	rdev->ih.rptr = rptr;
7860 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7861 	atomic_set(&rdev->ih.lock, 0);
7862 
7863 	/* make sure wptr hasn't changed while processing */
7864 	wptr = cik_get_ih_wptr(rdev);
7865 	if (wptr != rptr)
7866 		goto restart_ih;
7867 
7868 	return IRQ_HANDLED;
7869 }
7870 
7871 /*
7872  * startup/shutdown callbacks
7873  */
7874 /**
7875  * cik_startup - program the asic to a functional state
7876  *
7877  * @rdev: radeon_device pointer
7878  *
7879  * Programs the asic to a functional state (CIK).
7880  * Called by cik_init() and cik_resume().
7881  * Returns 0 for success, error for failure.
7882  */
7883 static int cik_startup(struct radeon_device *rdev)
7884 {
7885 	struct radeon_ring *ring;
7886 	int r;
7887 
7888 	/* enable pcie gen2/3 link */
7889 	cik_pcie_gen3_enable(rdev);
7890 	/* enable aspm */
7891 	cik_program_aspm(rdev);
7892 
7893 	/* scratch needs to be initialized before MC */
7894 	r = r600_vram_scratch_init(rdev);
7895 	if (r)
7896 		return r;
7897 
7898 	cik_mc_program(rdev);
7899 
7900 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7901 		r = ci_mc_load_microcode(rdev);
7902 		if (r) {
7903 			DRM_ERROR("Failed to load MC firmware!\n");
7904 			return r;
7905 		}
7906 	}
7907 
7908 	r = cik_pcie_gart_enable(rdev);
7909 	if (r)
7910 		return r;
7911 	cik_gpu_init(rdev);
7912 
7913 	/* allocate rlc buffers */
7914 	if (rdev->flags & RADEON_IS_IGP) {
7915 		if (rdev->family == CHIP_KAVERI) {
7916 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7917 			rdev->rlc.reg_list_size =
7918 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7919 		} else {
7920 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7921 			rdev->rlc.reg_list_size =
7922 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7923 		}
7924 	}
7925 	rdev->rlc.cs_data = ci_cs_data;
7926 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7927 	r = sumo_rlc_init(rdev);
7928 	if (r) {
7929 		DRM_ERROR("Failed to init rlc BOs!\n");
7930 		return r;
7931 	}
7932 
7933 	/* allocate wb buffer */
7934 	r = radeon_wb_init(rdev);
7935 	if (r)
7936 		return r;
7937 
7938 	/* allocate mec buffers */
7939 	r = cik_mec_init(rdev);
7940 	if (r) {
7941 		DRM_ERROR("Failed to init MEC BOs!\n");
7942 		return r;
7943 	}
7944 
7945 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7946 	if (r) {
7947 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7948 		return r;
7949 	}
7950 
7951 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7952 	if (r) {
7953 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7954 		return r;
7955 	}
7956 
7957 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7958 	if (r) {
7959 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7960 		return r;
7961 	}
7962 
7963 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7964 	if (r) {
7965 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7966 		return r;
7967 	}
7968 
7969 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7970 	if (r) {
7971 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7972 		return r;
7973 	}
7974 
7975 	r = radeon_uvd_resume(rdev);
7976 	if (!r) {
7977 		r = uvd_v4_2_resume(rdev);
7978 		if (!r) {
7979 			r = radeon_fence_driver_start_ring(rdev,
7980 							   R600_RING_TYPE_UVD_INDEX);
7981 			if (r)
7982 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7983 		}
7984 	}
7985 	if (r)
7986 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7987 
7988 	r = radeon_vce_resume(rdev);
7989 	if (!r) {
7990 		r = vce_v2_0_resume(rdev);
7991 		if (!r)
7992 			r = radeon_fence_driver_start_ring(rdev,
7993 							   TN_RING_TYPE_VCE1_INDEX);
7994 		if (!r)
7995 			r = radeon_fence_driver_start_ring(rdev,
7996 							   TN_RING_TYPE_VCE2_INDEX);
7997 	}
7998 	if (r) {
7999 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8000 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8001 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8002 	}
8003 
8004 	/* Enable IRQ */
8005 	if (!rdev->irq.installed) {
8006 		r = radeon_irq_kms_init(rdev);
8007 		if (r)
8008 			return r;
8009 	}
8010 
8011 	r = cik_irq_init(rdev);
8012 	if (r) {
8013 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8014 		radeon_irq_kms_fini(rdev);
8015 		return r;
8016 	}
8017 	cik_irq_set(rdev);
8018 
8019 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8020 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8021 			     PACKET3(PACKET3_NOP, 0x3FFF));
8022 	if (r)
8023 		return r;
8024 
8025 	/* set up the compute queues */
8026 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8027 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8028 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8029 			     PACKET3(PACKET3_NOP, 0x3FFF));
8030 	if (r)
8031 		return r;
8032 	ring->me = 1; /* first MEC */
8033 	ring->pipe = 0; /* first pipe */
8034 	ring->queue = 0; /* first queue */
8035 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8036 
8037 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8038 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8039 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8040 			     PACKET3(PACKET3_NOP, 0x3FFF));
8041 	if (r)
8042 		return r;
8043 	/* dGPU only have 1 MEC */
8044 	ring->me = 1; /* first MEC */
8045 	ring->pipe = 0; /* first pipe */
8046 	ring->queue = 1; /* second queue */
8047 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8048 
8049 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8050 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8051 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8052 	if (r)
8053 		return r;
8054 
8055 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8056 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8057 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8058 	if (r)
8059 		return r;
8060 
8061 	r = cik_cp_resume(rdev);
8062 	if (r)
8063 		return r;
8064 
8065 	r = cik_sdma_resume(rdev);
8066 	if (r)
8067 		return r;
8068 
8069 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8070 	if (ring->ring_size) {
8071 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8072 				     RADEON_CP_PACKET2);
8073 		if (!r)
8074 			r = uvd_v1_0_init(rdev);
8075 		if (r)
8076 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8077 	}
8078 
8079 	r = -ENOENT;
8080 
8081 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8082 	if (ring->ring_size)
8083 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8084 				     VCE_CMD_NO_OP);
8085 
8086 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8087 	if (ring->ring_size)
8088 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8089 				     VCE_CMD_NO_OP);
8090 
8091 	if (!r)
8092 		r = vce_v1_0_init(rdev);
8093 	else if (r != -ENOENT)
8094 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8095 
8096 	r = radeon_ib_pool_init(rdev);
8097 	if (r) {
8098 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8099 		return r;
8100 	}
8101 
8102 	r = radeon_vm_manager_init(rdev);
8103 	if (r) {
8104 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8105 		return r;
8106 	}
8107 
8108 	r = dce6_audio_init(rdev);
8109 	if (r)
8110 		return r;
8111 
8112 	return 0;
8113 }
8114 
8115 /**
8116  * cik_resume - resume the asic to a functional state
8117  *
8118  * @rdev: radeon_device pointer
8119  *
8120  * Programs the asic to a functional state (CIK).
8121  * Called at resume.
8122  * Returns 0 for success, error for failure.
8123  */
8124 int cik_resume(struct radeon_device *rdev)
8125 {
8126 	int r;
8127 
8128 	/* post card */
8129 	atom_asic_init(rdev->mode_info.atom_context);
8130 
8131 	/* init golden registers */
8132 	cik_init_golden_registers(rdev);
8133 
8134 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8135 		radeon_pm_resume(rdev);
8136 
8137 	rdev->accel_working = true;
8138 	r = cik_startup(rdev);
8139 	if (r) {
8140 		DRM_ERROR("cik startup failed on resume\n");
8141 		rdev->accel_working = false;
8142 		return r;
8143 	}
8144 
8145 	return r;
8146 
8147 }
8148 
8149 /**
8150  * cik_suspend - suspend the asic
8151  *
8152  * @rdev: radeon_device pointer
8153  *
8154  * Bring the chip into a state suitable for suspend (CIK).
8155  * Called at suspend.
8156  * Returns 0 for success.
8157  */
8158 int cik_suspend(struct radeon_device *rdev)
8159 {
8160 	radeon_pm_suspend(rdev);
8161 	dce6_audio_fini(rdev);
8162 	radeon_vm_manager_fini(rdev);
8163 	cik_cp_enable(rdev, false);
8164 	cik_sdma_enable(rdev, false);
8165 	uvd_v1_0_fini(rdev);
8166 	radeon_uvd_suspend(rdev);
8167 	radeon_vce_suspend(rdev);
8168 	cik_fini_pg(rdev);
8169 	cik_fini_cg(rdev);
8170 	cik_irq_suspend(rdev);
8171 	radeon_wb_disable(rdev);
8172 	cik_pcie_gart_disable(rdev);
8173 	return 0;
8174 }
8175 
8176 /* Plan is to move initialization in that function and use
8177  * helper function so that radeon_device_init pretty much
8178  * do nothing more than calling asic specific function. This
8179  * should also allow to remove a bunch of callback function
8180  * like vram_info.
8181  */
8182 /**
8183  * cik_init - asic specific driver and hw init
8184  *
8185  * @rdev: radeon_device pointer
8186  *
8187  * Setup asic specific driver variables and program the hw
8188  * to a functional state (CIK).
8189  * Called at driver startup.
8190  * Returns 0 for success, errors for failure.
8191  */
8192 int cik_init(struct radeon_device *rdev)
8193 {
8194 	struct radeon_ring *ring;
8195 	int r;
8196 
8197 	/* Read BIOS */
8198 	if (!radeon_get_bios(rdev)) {
8199 		if (ASIC_IS_AVIVO(rdev))
8200 			return -EINVAL;
8201 	}
8202 	/* Must be an ATOMBIOS */
8203 	if (!rdev->is_atom_bios) {
8204 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8205 		return -EINVAL;
8206 	}
8207 	r = radeon_atombios_init(rdev);
8208 	if (r)
8209 		return r;
8210 
8211 	/* Post card if necessary */
8212 	if (!radeon_card_posted(rdev)) {
8213 		if (!rdev->bios) {
8214 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8215 			return -EINVAL;
8216 		}
8217 		DRM_INFO("GPU not posted. posting now...\n");
8218 		atom_asic_init(rdev->mode_info.atom_context);
8219 	}
8220 	/* init golden registers */
8221 	cik_init_golden_registers(rdev);
8222 	/* Initialize scratch registers */
8223 	cik_scratch_init(rdev);
8224 	/* Initialize surface registers */
8225 	radeon_surface_init(rdev);
8226 	/* Initialize clocks */
8227 	radeon_get_clock_info(rdev->ddev);
8228 
8229 	/* Fence driver */
8230 	r = radeon_fence_driver_init(rdev);
8231 	if (r)
8232 		return r;
8233 
8234 	/* initialize memory controller */
8235 	r = cik_mc_init(rdev);
8236 	if (r)
8237 		return r;
8238 	/* Memory manager */
8239 	r = radeon_bo_init(rdev);
8240 	if (r)
8241 		return r;
8242 
8243 	if (rdev->flags & RADEON_IS_IGP) {
8244 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8245 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8246 			r = cik_init_microcode(rdev);
8247 			if (r) {
8248 				DRM_ERROR("Failed to load firmware!\n");
8249 				return r;
8250 			}
8251 		}
8252 	} else {
8253 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8254 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8255 		    !rdev->mc_fw) {
8256 			r = cik_init_microcode(rdev);
8257 			if (r) {
8258 				DRM_ERROR("Failed to load firmware!\n");
8259 				return r;
8260 			}
8261 		}
8262 	}
8263 
8264 	/* Initialize power management */
8265 	radeon_pm_init(rdev);
8266 
8267 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8268 	ring->ring_obj = NULL;
8269 	r600_ring_init(rdev, ring, 1024 * 1024);
8270 
8271 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8272 	ring->ring_obj = NULL;
8273 	r600_ring_init(rdev, ring, 1024 * 1024);
8274 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8275 	if (r)
8276 		return r;
8277 
8278 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8279 	ring->ring_obj = NULL;
8280 	r600_ring_init(rdev, ring, 1024 * 1024);
8281 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8282 	if (r)
8283 		return r;
8284 
8285 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8286 	ring->ring_obj = NULL;
8287 	r600_ring_init(rdev, ring, 256 * 1024);
8288 
8289 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8290 	ring->ring_obj = NULL;
8291 	r600_ring_init(rdev, ring, 256 * 1024);
8292 
8293 	r = radeon_uvd_init(rdev);
8294 	if (!r) {
8295 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8296 		ring->ring_obj = NULL;
8297 		r600_ring_init(rdev, ring, 4096);
8298 	}
8299 
8300 	r = radeon_vce_init(rdev);
8301 	if (!r) {
8302 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8303 		ring->ring_obj = NULL;
8304 		r600_ring_init(rdev, ring, 4096);
8305 
8306 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8307 		ring->ring_obj = NULL;
8308 		r600_ring_init(rdev, ring, 4096);
8309 	}
8310 
8311 	rdev->ih.ring_obj = NULL;
8312 	r600_ih_ring_init(rdev, 64 * 1024);
8313 
8314 	r = r600_pcie_gart_init(rdev);
8315 	if (r)
8316 		return r;
8317 
8318 	rdev->accel_working = true;
8319 	r = cik_startup(rdev);
8320 	if (r) {
8321 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8322 		cik_cp_fini(rdev);
8323 		cik_sdma_fini(rdev);
8324 		cik_irq_fini(rdev);
8325 		sumo_rlc_fini(rdev);
8326 		cik_mec_fini(rdev);
8327 		radeon_wb_fini(rdev);
8328 		radeon_ib_pool_fini(rdev);
8329 		radeon_vm_manager_fini(rdev);
8330 		radeon_irq_kms_fini(rdev);
8331 		cik_pcie_gart_fini(rdev);
8332 		rdev->accel_working = false;
8333 	}
8334 
8335 	/* Don't start up if the MC ucode is missing.
8336 	 * The default clocks and voltages before the MC ucode
8337 	 * is loaded are not suffient for advanced operations.
8338 	 */
8339 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8340 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8341 		return -EINVAL;
8342 	}
8343 
8344 	return 0;
8345 }
8346 
8347 /**
8348  * cik_fini - asic specific driver and hw fini
8349  *
8350  * @rdev: radeon_device pointer
8351  *
8352  * Tear down the asic specific driver variables and program the hw
8353  * to an idle state (CIK).
8354  * Called at driver unload.
8355  */
8356 void cik_fini(struct radeon_device *rdev)
8357 {
8358 	radeon_pm_fini(rdev);
8359 	cik_cp_fini(rdev);
8360 	cik_sdma_fini(rdev);
8361 	cik_fini_pg(rdev);
8362 	cik_fini_cg(rdev);
8363 	cik_irq_fini(rdev);
8364 	sumo_rlc_fini(rdev);
8365 	cik_mec_fini(rdev);
8366 	radeon_wb_fini(rdev);
8367 	radeon_vm_manager_fini(rdev);
8368 	radeon_ib_pool_fini(rdev);
8369 	radeon_irq_kms_fini(rdev);
8370 	uvd_v1_0_fini(rdev);
8371 	radeon_uvd_fini(rdev);
8372 	radeon_vce_fini(rdev);
8373 	cik_pcie_gart_fini(rdev);
8374 	r600_vram_scratch_fini(rdev);
8375 	radeon_gem_fini(rdev);
8376 	radeon_fence_driver_fini(rdev);
8377 	radeon_bo_fini(rdev);
8378 	radeon_atombios_fini(rdev);
8379 	kfree(rdev->bios);
8380 	rdev->bios = NULL;
8381 }
8382 
8383 void dce8_program_fmt(struct drm_encoder *encoder)
8384 {
8385 	struct drm_device *dev = encoder->dev;
8386 	struct radeon_device *rdev = dev->dev_private;
8387 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8388 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8389 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8390 	int bpc = 0;
8391 	u32 tmp = 0;
8392 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8393 
8394 	if (connector) {
8395 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8396 		bpc = radeon_get_monitor_bpc(connector);
8397 		dither = radeon_connector->dither;
8398 	}
8399 
8400 	/* LVDS/eDP FMT is set up by atom */
8401 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8402 		return;
8403 
8404 	/* not needed for analog */
8405 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8406 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8407 		return;
8408 
8409 	if (bpc == 0)
8410 		return;
8411 
8412 	switch (bpc) {
8413 	case 6:
8414 		if (dither == RADEON_FMT_DITHER_ENABLE)
8415 			/* XXX sort out optimal dither settings */
8416 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8417 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8418 		else
8419 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8420 		break;
8421 	case 8:
8422 		if (dither == RADEON_FMT_DITHER_ENABLE)
8423 			/* XXX sort out optimal dither settings */
8424 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8425 				FMT_RGB_RANDOM_ENABLE |
8426 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8427 		else
8428 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8429 		break;
8430 	case 10:
8431 		if (dither == RADEON_FMT_DITHER_ENABLE)
8432 			/* XXX sort out optimal dither settings */
8433 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8434 				FMT_RGB_RANDOM_ENABLE |
8435 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8436 		else
8437 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8438 		break;
8439 	default:
8440 		/* not needed */
8441 		break;
8442 	}
8443 
8444 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8445 }
8446 
8447 /* display watermark setup */
8448 /**
8449  * dce8_line_buffer_adjust - Set up the line buffer
8450  *
8451  * @rdev: radeon_device pointer
8452  * @radeon_crtc: the selected display controller
8453  * @mode: the current display mode on the selected display
8454  * controller
8455  *
8456  * Setup up the line buffer allocation for
8457  * the selected display controller (CIK).
8458  * Returns the line buffer size in pixels.
8459  */
8460 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8461 				   struct radeon_crtc *radeon_crtc,
8462 				   struct drm_display_mode *mode)
8463 {
8464 	u32 tmp, buffer_alloc, i;
8465 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8466 	/*
8467 	 * Line Buffer Setup
8468 	 * There are 6 line buffers, one for each display controllers.
8469 	 * There are 3 partitions per LB. Select the number of partitions
8470 	 * to enable based on the display width.  For display widths larger
8471 	 * than 4096, you need use to use 2 display controllers and combine
8472 	 * them using the stereo blender.
8473 	 */
8474 	if (radeon_crtc->base.enabled && mode) {
8475 		if (mode->crtc_hdisplay < 1920) {
8476 			tmp = 1;
8477 			buffer_alloc = 2;
8478 		} else if (mode->crtc_hdisplay < 2560) {
8479 			tmp = 2;
8480 			buffer_alloc = 2;
8481 		} else if (mode->crtc_hdisplay < 4096) {
8482 			tmp = 0;
8483 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8484 		} else {
8485 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8486 			tmp = 0;
8487 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8488 		}
8489 	} else {
8490 		tmp = 1;
8491 		buffer_alloc = 0;
8492 	}
8493 
8494 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8495 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8496 
8497 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8498 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8499 	for (i = 0; i < rdev->usec_timeout; i++) {
8500 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8501 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8502 			break;
8503 		udelay(1);
8504 	}
8505 
8506 	if (radeon_crtc->base.enabled && mode) {
8507 		switch (tmp) {
8508 		case 0:
8509 		default:
8510 			return 4096 * 2;
8511 		case 1:
8512 			return 1920 * 2;
8513 		case 2:
8514 			return 2560 * 2;
8515 		}
8516 	}
8517 
8518 	/* controller not enabled, so no lb used */
8519 	return 0;
8520 }
8521 
8522 /**
8523  * cik_get_number_of_dram_channels - get the number of dram channels
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Look up the number of video ram channels (CIK).
8528  * Used for display watermark bandwidth calculations
8529  * Returns the number of dram channels
8530  */
8531 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8532 {
8533 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8534 
8535 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8536 	case 0:
8537 	default:
8538 		return 1;
8539 	case 1:
8540 		return 2;
8541 	case 2:
8542 		return 4;
8543 	case 3:
8544 		return 8;
8545 	case 4:
8546 		return 3;
8547 	case 5:
8548 		return 6;
8549 	case 6:
8550 		return 10;
8551 	case 7:
8552 		return 12;
8553 	case 8:
8554 		return 16;
8555 	}
8556 }
8557 
8558 struct dce8_wm_params {
8559 	u32 dram_channels; /* number of dram channels */
8560 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8561 	u32 sclk;          /* engine clock in kHz */
8562 	u32 disp_clk;      /* display clock in kHz */
8563 	u32 src_width;     /* viewport width */
8564 	u32 active_time;   /* active display time in ns */
8565 	u32 blank_time;    /* blank time in ns */
8566 	bool interlaced;    /* mode is interlaced */
8567 	fixed20_12 vsc;    /* vertical scale ratio */
8568 	u32 num_heads;     /* number of active crtcs */
8569 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8570 	u32 lb_size;       /* line buffer allocated to pipe */
8571 	u32 vtaps;         /* vertical scaler taps */
8572 };
8573 
8574 /**
8575  * dce8_dram_bandwidth - get the dram bandwidth
8576  *
8577  * @wm: watermark calculation data
8578  *
8579  * Calculate the raw dram bandwidth (CIK).
8580  * Used for display watermark bandwidth calculations
8581  * Returns the dram bandwidth in MBytes/s
8582  */
8583 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8584 {
8585 	/* Calculate raw DRAM Bandwidth */
8586 	fixed20_12 dram_efficiency; /* 0.7 */
8587 	fixed20_12 yclk, dram_channels, bandwidth;
8588 	fixed20_12 a;
8589 
8590 	a.full = dfixed_const(1000);
8591 	yclk.full = dfixed_const(wm->yclk);
8592 	yclk.full = dfixed_div(yclk, a);
8593 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8594 	a.full = dfixed_const(10);
8595 	dram_efficiency.full = dfixed_const(7);
8596 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8597 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8598 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8599 
8600 	return dfixed_trunc(bandwidth);
8601 }
8602 
8603 /**
8604  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8605  *
8606  * @wm: watermark calculation data
8607  *
8608  * Calculate the dram bandwidth used for display (CIK).
8609  * Used for display watermark bandwidth calculations
8610  * Returns the dram bandwidth for display in MBytes/s
8611  */
8612 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8613 {
8614 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8615 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8616 	fixed20_12 yclk, dram_channels, bandwidth;
8617 	fixed20_12 a;
8618 
8619 	a.full = dfixed_const(1000);
8620 	yclk.full = dfixed_const(wm->yclk);
8621 	yclk.full = dfixed_div(yclk, a);
8622 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8623 	a.full = dfixed_const(10);
8624 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8625 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8626 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8627 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8628 
8629 	return dfixed_trunc(bandwidth);
8630 }
8631 
8632 /**
8633  * dce8_data_return_bandwidth - get the data return bandwidth
8634  *
8635  * @wm: watermark calculation data
8636  *
8637  * Calculate the data return bandwidth used for display (CIK).
8638  * Used for display watermark bandwidth calculations
8639  * Returns the data return bandwidth in MBytes/s
8640  */
8641 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8642 {
8643 	/* Calculate the display Data return Bandwidth */
8644 	fixed20_12 return_efficiency; /* 0.8 */
8645 	fixed20_12 sclk, bandwidth;
8646 	fixed20_12 a;
8647 
8648 	a.full = dfixed_const(1000);
8649 	sclk.full = dfixed_const(wm->sclk);
8650 	sclk.full = dfixed_div(sclk, a);
8651 	a.full = dfixed_const(10);
8652 	return_efficiency.full = dfixed_const(8);
8653 	return_efficiency.full = dfixed_div(return_efficiency, a);
8654 	a.full = dfixed_const(32);
8655 	bandwidth.full = dfixed_mul(a, sclk);
8656 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8657 
8658 	return dfixed_trunc(bandwidth);
8659 }
8660 
8661 /**
8662  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8663  *
8664  * @wm: watermark calculation data
8665  *
8666  * Calculate the dmif bandwidth used for display (CIK).
8667  * Used for display watermark bandwidth calculations
8668  * Returns the dmif bandwidth in MBytes/s
8669  */
8670 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8671 {
8672 	/* Calculate the DMIF Request Bandwidth */
8673 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8674 	fixed20_12 disp_clk, bandwidth;
8675 	fixed20_12 a, b;
8676 
8677 	a.full = dfixed_const(1000);
8678 	disp_clk.full = dfixed_const(wm->disp_clk);
8679 	disp_clk.full = dfixed_div(disp_clk, a);
8680 	a.full = dfixed_const(32);
8681 	b.full = dfixed_mul(a, disp_clk);
8682 
8683 	a.full = dfixed_const(10);
8684 	disp_clk_request_efficiency.full = dfixed_const(8);
8685 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8686 
8687 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8688 
8689 	return dfixed_trunc(bandwidth);
8690 }
8691 
8692 /**
8693  * dce8_available_bandwidth - get the min available bandwidth
8694  *
8695  * @wm: watermark calculation data
8696  *
8697  * Calculate the min available bandwidth used for display (CIK).
8698  * Used for display watermark bandwidth calculations
8699  * Returns the min available bandwidth in MBytes/s
8700  */
8701 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8702 {
8703 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8704 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8705 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8706 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8707 
8708 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8709 }
8710 
8711 /**
8712  * dce8_average_bandwidth - get the average available bandwidth
8713  *
8714  * @wm: watermark calculation data
8715  *
8716  * Calculate the average available bandwidth used for display (CIK).
8717  * Used for display watermark bandwidth calculations
8718  * Returns the average available bandwidth in MBytes/s
8719  */
8720 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8721 {
8722 	/* Calculate the display mode Average Bandwidth
8723 	 * DisplayMode should contain the source and destination dimensions,
8724 	 * timing, etc.
8725 	 */
8726 	fixed20_12 bpp;
8727 	fixed20_12 line_time;
8728 	fixed20_12 src_width;
8729 	fixed20_12 bandwidth;
8730 	fixed20_12 a;
8731 
8732 	a.full = dfixed_const(1000);
8733 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8734 	line_time.full = dfixed_div(line_time, a);
8735 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8736 	src_width.full = dfixed_const(wm->src_width);
8737 	bandwidth.full = dfixed_mul(src_width, bpp);
8738 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8739 	bandwidth.full = dfixed_div(bandwidth, line_time);
8740 
8741 	return dfixed_trunc(bandwidth);
8742 }
8743 
8744 /**
8745  * dce8_latency_watermark - get the latency watermark
8746  *
8747  * @wm: watermark calculation data
8748  *
8749  * Calculate the latency watermark (CIK).
8750  * Used for display watermark bandwidth calculations
8751  * Returns the latency watermark in ns
8752  */
8753 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8754 {
8755 	/* First calculate the latency in ns */
8756 	u32 mc_latency = 2000; /* 2000 ns. */
8757 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8758 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8759 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8760 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8761 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8762 		(wm->num_heads * cursor_line_pair_return_time);
8763 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8764 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8765 	u32 tmp, dmif_size = 12288;
8766 	fixed20_12 a, b, c;
8767 
8768 	if (wm->num_heads == 0)
8769 		return 0;
8770 
8771 	a.full = dfixed_const(2);
8772 	b.full = dfixed_const(1);
8773 	if ((wm->vsc.full > a.full) ||
8774 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8775 	    (wm->vtaps >= 5) ||
8776 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8777 		max_src_lines_per_dst_line = 4;
8778 	else
8779 		max_src_lines_per_dst_line = 2;
8780 
8781 	a.full = dfixed_const(available_bandwidth);
8782 	b.full = dfixed_const(wm->num_heads);
8783 	a.full = dfixed_div(a, b);
8784 
8785 	b.full = dfixed_const(mc_latency + 512);
8786 	c.full = dfixed_const(wm->disp_clk);
8787 	b.full = dfixed_div(b, c);
8788 
8789 	c.full = dfixed_const(dmif_size);
8790 	b.full = dfixed_div(c, b);
8791 
8792 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8793 
8794 	b.full = dfixed_const(1000);
8795 	c.full = dfixed_const(wm->disp_clk);
8796 	b.full = dfixed_div(c, b);
8797 	c.full = dfixed_const(wm->bytes_per_pixel);
8798 	b.full = dfixed_mul(b, c);
8799 
8800 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8801 
8802 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8803 	b.full = dfixed_const(1000);
8804 	c.full = dfixed_const(lb_fill_bw);
8805 	b.full = dfixed_div(c, b);
8806 	a.full = dfixed_div(a, b);
8807 	line_fill_time = dfixed_trunc(a);
8808 
8809 	if (line_fill_time < wm->active_time)
8810 		return latency;
8811 	else
8812 		return latency + (line_fill_time - wm->active_time);
8813 
8814 }
8815 
8816 /**
8817  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8818  * average and available dram bandwidth
8819  *
8820  * @wm: watermark calculation data
8821  *
8822  * Check if the display average bandwidth fits in the display
8823  * dram bandwidth (CIK).
8824  * Used for display watermark bandwidth calculations
8825  * Returns true if the display fits, false if not.
8826  */
8827 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8828 {
8829 	if (dce8_average_bandwidth(wm) <=
8830 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8831 		return true;
8832 	else
8833 		return false;
8834 }
8835 
8836 /**
8837  * dce8_average_bandwidth_vs_available_bandwidth - check
8838  * average and available bandwidth
8839  *
8840  * @wm: watermark calculation data
8841  *
8842  * Check if the display average bandwidth fits in the display
8843  * available bandwidth (CIK).
8844  * Used for display watermark bandwidth calculations
8845  * Returns true if the display fits, false if not.
8846  */
8847 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8848 {
8849 	if (dce8_average_bandwidth(wm) <=
8850 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8851 		return true;
8852 	else
8853 		return false;
8854 }
8855 
8856 /**
8857  * dce8_check_latency_hiding - check latency hiding
8858  *
8859  * @wm: watermark calculation data
8860  *
8861  * Check latency hiding (CIK).
8862  * Used for display watermark bandwidth calculations
8863  * Returns true if the display fits, false if not.
8864  */
8865 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8866 {
8867 	u32 lb_partitions = wm->lb_size / wm->src_width;
8868 	u32 line_time = wm->active_time + wm->blank_time;
8869 	u32 latency_tolerant_lines;
8870 	u32 latency_hiding;
8871 	fixed20_12 a;
8872 
8873 	a.full = dfixed_const(1);
8874 	if (wm->vsc.full > a.full)
8875 		latency_tolerant_lines = 1;
8876 	else {
8877 		if (lb_partitions <= (wm->vtaps + 1))
8878 			latency_tolerant_lines = 1;
8879 		else
8880 			latency_tolerant_lines = 2;
8881 	}
8882 
8883 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8884 
8885 	if (dce8_latency_watermark(wm) <= latency_hiding)
8886 		return true;
8887 	else
8888 		return false;
8889 }
8890 
8891 /**
8892  * dce8_program_watermarks - program display watermarks
8893  *
8894  * @rdev: radeon_device pointer
8895  * @radeon_crtc: the selected display controller
8896  * @lb_size: line buffer size
8897  * @num_heads: number of display controllers in use
8898  *
8899  * Calculate and program the display watermarks for the
8900  * selected display controller (CIK).
8901  */
8902 static void dce8_program_watermarks(struct radeon_device *rdev,
8903 				    struct radeon_crtc *radeon_crtc,
8904 				    u32 lb_size, u32 num_heads)
8905 {
8906 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8907 	struct dce8_wm_params wm_low, wm_high;
8908 	u32 pixel_period;
8909 	u32 line_time = 0;
8910 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8911 	u32 tmp, wm_mask;
8912 
8913 	if (radeon_crtc->base.enabled && num_heads && mode) {
8914 		pixel_period = 1000000 / (u32)mode->clock;
8915 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8916 
8917 		/* watermark for high clocks */
8918 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8919 		    rdev->pm.dpm_enabled) {
8920 			wm_high.yclk =
8921 				radeon_dpm_get_mclk(rdev, false) * 10;
8922 			wm_high.sclk =
8923 				radeon_dpm_get_sclk(rdev, false) * 10;
8924 		} else {
8925 			wm_high.yclk = rdev->pm.current_mclk * 10;
8926 			wm_high.sclk = rdev->pm.current_sclk * 10;
8927 		}
8928 
8929 		wm_high.disp_clk = mode->clock;
8930 		wm_high.src_width = mode->crtc_hdisplay;
8931 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8932 		wm_high.blank_time = line_time - wm_high.active_time;
8933 		wm_high.interlaced = false;
8934 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8935 			wm_high.interlaced = true;
8936 		wm_high.vsc = radeon_crtc->vsc;
8937 		wm_high.vtaps = 1;
8938 		if (radeon_crtc->rmx_type != RMX_OFF)
8939 			wm_high.vtaps = 2;
8940 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8941 		wm_high.lb_size = lb_size;
8942 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8943 		wm_high.num_heads = num_heads;
8944 
8945 		/* set for high clocks */
8946 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8947 
8948 		/* possibly force display priority to high */
8949 		/* should really do this at mode validation time... */
8950 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8951 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8952 		    !dce8_check_latency_hiding(&wm_high) ||
8953 		    (rdev->disp_priority == 2)) {
8954 			DRM_DEBUG_KMS("force priority to high\n");
8955 		}
8956 
8957 		/* watermark for low clocks */
8958 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8959 		    rdev->pm.dpm_enabled) {
8960 			wm_low.yclk =
8961 				radeon_dpm_get_mclk(rdev, true) * 10;
8962 			wm_low.sclk =
8963 				radeon_dpm_get_sclk(rdev, true) * 10;
8964 		} else {
8965 			wm_low.yclk = rdev->pm.current_mclk * 10;
8966 			wm_low.sclk = rdev->pm.current_sclk * 10;
8967 		}
8968 
8969 		wm_low.disp_clk = mode->clock;
8970 		wm_low.src_width = mode->crtc_hdisplay;
8971 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8972 		wm_low.blank_time = line_time - wm_low.active_time;
8973 		wm_low.interlaced = false;
8974 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8975 			wm_low.interlaced = true;
8976 		wm_low.vsc = radeon_crtc->vsc;
8977 		wm_low.vtaps = 1;
8978 		if (radeon_crtc->rmx_type != RMX_OFF)
8979 			wm_low.vtaps = 2;
8980 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8981 		wm_low.lb_size = lb_size;
8982 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8983 		wm_low.num_heads = num_heads;
8984 
8985 		/* set for low clocks */
8986 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8987 
8988 		/* possibly force display priority to high */
8989 		/* should really do this at mode validation time... */
8990 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8991 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8992 		    !dce8_check_latency_hiding(&wm_low) ||
8993 		    (rdev->disp_priority == 2)) {
8994 			DRM_DEBUG_KMS("force priority to high\n");
8995 		}
8996 	}
8997 
8998 	/* select wm A */
8999 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9000 	tmp = wm_mask;
9001 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9002 	tmp |= LATENCY_WATERMARK_MASK(1);
9003 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9004 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9005 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9006 		LATENCY_HIGH_WATERMARK(line_time)));
9007 	/* select wm B */
9008 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9009 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9010 	tmp |= LATENCY_WATERMARK_MASK(2);
9011 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9012 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9013 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9014 		LATENCY_HIGH_WATERMARK(line_time)));
9015 	/* restore original selection */
9016 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9017 
9018 	/* save values for DPM */
9019 	radeon_crtc->line_time = line_time;
9020 	radeon_crtc->wm_high = latency_watermark_a;
9021 	radeon_crtc->wm_low = latency_watermark_b;
9022 }
9023 
9024 /**
9025  * dce8_bandwidth_update - program display watermarks
9026  *
9027  * @rdev: radeon_device pointer
9028  *
9029  * Calculate and program the display watermarks and line
9030  * buffer allocation (CIK).
9031  */
9032 void dce8_bandwidth_update(struct radeon_device *rdev)
9033 {
9034 	struct drm_display_mode *mode = NULL;
9035 	u32 num_heads = 0, lb_size;
9036 	int i;
9037 
9038 	radeon_update_display_priority(rdev);
9039 
9040 	for (i = 0; i < rdev->num_crtc; i++) {
9041 		if (rdev->mode_info.crtcs[i]->base.enabled)
9042 			num_heads++;
9043 	}
9044 	for (i = 0; i < rdev->num_crtc; i++) {
9045 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9046 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9047 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9048 	}
9049 }
9050 
9051 /**
9052  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9053  *
9054  * @rdev: radeon_device pointer
9055  *
9056  * Fetches a GPU clock counter snapshot (SI).
9057  * Returns the 64 bit clock counter snapshot.
9058  */
9059 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9060 {
9061 	uint64_t clock;
9062 
9063 	mutex_lock(&rdev->gpu_clock_mutex);
9064 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9065 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9066 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9067 	mutex_unlock(&rdev->gpu_clock_mutex);
9068 	return clock;
9069 }
9070 
9071 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9072                               u32 cntl_reg, u32 status_reg)
9073 {
9074 	int r, i;
9075 	struct atom_clock_dividers dividers;
9076 	uint32_t tmp;
9077 
9078 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9079 					   clock, false, &dividers);
9080 	if (r)
9081 		return r;
9082 
9083 	tmp = RREG32_SMC(cntl_reg);
9084 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9085 	tmp |= dividers.post_divider;
9086 	WREG32_SMC(cntl_reg, tmp);
9087 
9088 	for (i = 0; i < 100; i++) {
9089 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9090 			break;
9091 		mdelay(10);
9092 	}
9093 	if (i == 100)
9094 		return -ETIMEDOUT;
9095 
9096 	return 0;
9097 }
9098 
9099 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9100 {
9101 	int r = 0;
9102 
9103 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9104 	if (r)
9105 		return r;
9106 
9107 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9108 	return r;
9109 }
9110 
9111 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9112 {
9113 	int r, i;
9114 	struct atom_clock_dividers dividers;
9115 	u32 tmp;
9116 
9117 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9118 					   ecclk, false, &dividers);
9119 	if (r)
9120 		return r;
9121 
9122 	for (i = 0; i < 100; i++) {
9123 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9124 			break;
9125 		mdelay(10);
9126 	}
9127 	if (i == 100)
9128 		return -ETIMEDOUT;
9129 
9130 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9131 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9132 	tmp |= dividers.post_divider;
9133 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9134 
9135 	for (i = 0; i < 100; i++) {
9136 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9137 			break;
9138 		mdelay(10);
9139 	}
9140 	if (i == 100)
9141 		return -ETIMEDOUT;
9142 
9143 	return 0;
9144 }
9145 
9146 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9147 {
9148 	struct pci_dev *root = rdev->pdev->bus->self;
9149 	int bridge_pos, gpu_pos;
9150 	u32 speed_cntl, mask, current_data_rate;
9151 	int ret, i;
9152 	u16 tmp16;
9153 
9154 	if (radeon_pcie_gen2 == 0)
9155 		return;
9156 
9157 	if (rdev->flags & RADEON_IS_IGP)
9158 		return;
9159 
9160 	if (!(rdev->flags & RADEON_IS_PCIE))
9161 		return;
9162 
9163 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9164 	if (ret != 0)
9165 		return;
9166 
9167 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9168 		return;
9169 
9170 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9171 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9172 		LC_CURRENT_DATA_RATE_SHIFT;
9173 	if (mask & DRM_PCIE_SPEED_80) {
9174 		if (current_data_rate == 2) {
9175 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9176 			return;
9177 		}
9178 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9179 	} else if (mask & DRM_PCIE_SPEED_50) {
9180 		if (current_data_rate == 1) {
9181 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9182 			return;
9183 		}
9184 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9185 	}
9186 
9187 	bridge_pos = pci_pcie_cap(root);
9188 	if (!bridge_pos)
9189 		return;
9190 
9191 	gpu_pos = pci_pcie_cap(rdev->pdev);
9192 	if (!gpu_pos)
9193 		return;
9194 
9195 	if (mask & DRM_PCIE_SPEED_80) {
9196 		/* re-try equalization if gen3 is not already enabled */
9197 		if (current_data_rate != 2) {
9198 			u16 bridge_cfg, gpu_cfg;
9199 			u16 bridge_cfg2, gpu_cfg2;
9200 			u32 max_lw, current_lw, tmp;
9201 
9202 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9203 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9204 
9205 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9206 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9207 
9208 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9209 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9210 
9211 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9212 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9213 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9214 
9215 			if (current_lw < max_lw) {
9216 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9217 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9218 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9219 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9220 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9221 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9222 				}
9223 			}
9224 
9225 			for (i = 0; i < 10; i++) {
9226 				/* check status */
9227 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9228 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9229 					break;
9230 
9231 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9232 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9233 
9234 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9235 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9236 
9237 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9238 				tmp |= LC_SET_QUIESCE;
9239 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9240 
9241 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9242 				tmp |= LC_REDO_EQ;
9243 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9244 
9245 				mdelay(100);
9246 
9247 				/* linkctl */
9248 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9249 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9250 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9251 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9252 
9253 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9254 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9255 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9256 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9257 
9258 				/* linkctl2 */
9259 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9260 				tmp16 &= ~((1 << 4) | (7 << 9));
9261 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9262 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9263 
9264 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9265 				tmp16 &= ~((1 << 4) | (7 << 9));
9266 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9267 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9268 
9269 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9270 				tmp &= ~LC_SET_QUIESCE;
9271 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9272 			}
9273 		}
9274 	}
9275 
9276 	/* set the link speed */
9277 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9278 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9279 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9280 
9281 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9282 	tmp16 &= ~0xf;
9283 	if (mask & DRM_PCIE_SPEED_80)
9284 		tmp16 |= 3; /* gen3 */
9285 	else if (mask & DRM_PCIE_SPEED_50)
9286 		tmp16 |= 2; /* gen2 */
9287 	else
9288 		tmp16 |= 1; /* gen1 */
9289 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9290 
9291 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9292 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9293 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9294 
9295 	for (i = 0; i < rdev->usec_timeout; i++) {
9296 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9297 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9298 			break;
9299 		udelay(1);
9300 	}
9301 }
9302 
9303 static void cik_program_aspm(struct radeon_device *rdev)
9304 {
9305 	u32 data, orig;
9306 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9307 	bool disable_clkreq = false;
9308 
9309 	if (radeon_aspm == 0)
9310 		return;
9311 
9312 	/* XXX double check IGPs */
9313 	if (rdev->flags & RADEON_IS_IGP)
9314 		return;
9315 
9316 	if (!(rdev->flags & RADEON_IS_PCIE))
9317 		return;
9318 
9319 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9320 	data &= ~LC_XMIT_N_FTS_MASK;
9321 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9322 	if (orig != data)
9323 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9324 
9325 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9326 	data |= LC_GO_TO_RECOVERY;
9327 	if (orig != data)
9328 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9329 
9330 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9331 	data |= P_IGNORE_EDB_ERR;
9332 	if (orig != data)
9333 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9334 
9335 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9336 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9337 	data |= LC_PMI_TO_L1_DIS;
9338 	if (!disable_l0s)
9339 		data |= LC_L0S_INACTIVITY(7);
9340 
9341 	if (!disable_l1) {
9342 		data |= LC_L1_INACTIVITY(7);
9343 		data &= ~LC_PMI_TO_L1_DIS;
9344 		if (orig != data)
9345 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9346 
9347 		if (!disable_plloff_in_l1) {
9348 			bool clk_req_support;
9349 
9350 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9351 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9352 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9353 			if (orig != data)
9354 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9355 
9356 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9357 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9358 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9359 			if (orig != data)
9360 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9361 
9362 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9363 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9364 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9365 			if (orig != data)
9366 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9367 
9368 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9369 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9370 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9371 			if (orig != data)
9372 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9373 
9374 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9375 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9376 			data |= LC_DYN_LANES_PWR_STATE(3);
9377 			if (orig != data)
9378 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9379 
9380 			if (!disable_clkreq) {
9381 				struct pci_dev *root = rdev->pdev->bus->self;
9382 				u32 lnkcap;
9383 
9384 				clk_req_support = false;
9385 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9386 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9387 					clk_req_support = true;
9388 			} else {
9389 				clk_req_support = false;
9390 			}
9391 
9392 			if (clk_req_support) {
9393 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9394 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9395 				if (orig != data)
9396 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9397 
9398 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9399 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9400 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9401 				if (orig != data)
9402 					WREG32_SMC(THM_CLK_CNTL, data);
9403 
9404 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9405 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9406 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9407 				if (orig != data)
9408 					WREG32_SMC(MISC_CLK_CTRL, data);
9409 
9410 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9411 				data &= ~BCLK_AS_XCLK;
9412 				if (orig != data)
9413 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9414 
9415 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9416 				data &= ~FORCE_BIF_REFCLK_EN;
9417 				if (orig != data)
9418 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9419 
9420 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9421 				data &= ~MPLL_CLKOUT_SEL_MASK;
9422 				data |= MPLL_CLKOUT_SEL(4);
9423 				if (orig != data)
9424 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9425 			}
9426 		}
9427 	} else {
9428 		if (orig != data)
9429 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9430 	}
9431 
9432 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9433 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9434 	if (orig != data)
9435 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9436 
9437 	if (!disable_l0s) {
9438 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9439 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9440 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9441 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9442 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9443 				data &= ~LC_L0S_INACTIVITY_MASK;
9444 				if (orig != data)
9445 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9446 			}
9447 		}
9448 	}
9449 }
9450